diff options
Diffstat (limited to 'library/std_detect')
51 files changed, 5077 insertions, 0 deletions
diff --git a/library/std_detect/Cargo.toml b/library/std_detect/Cargo.toml new file mode 100644 index 00000000000..8d91454726b --- /dev/null +++ b/library/std_detect/Cargo.toml @@ -0,0 +1,35 @@ +[package] +name = "std_detect" +version = "0.1.5" +authors = [ + "Alex Crichton <alex@alexcrichton.com>", + "Andrew Gallant <jamslam@gmail.com>", + "Gonzalo Brito Gadeschi <gonzalobg88@gmail.com>", +] +description = "`std::detect` - Rust's standard library run-time CPU feature detection." +homepage = "https://github.com/rust-lang/stdarch" +repository = "https://github.com/rust-lang/stdarch" +readme = "README.md" +keywords = ["std", "run-time", "feature", "detection"] +categories = ["hardware-support"] +license = "MIT OR Apache-2.0" +edition = "2024" + +[badges] +is-it-maintained-issue-resolution = { repository = "rust-lang/stdarch" } +is-it-maintained-open-issues = { repository = "rust-lang/stdarch" } +maintenance = { status = "experimental" } + +[dependencies] +cfg-if = "1.0.0" +core = { path = "../core" } +alloc = { path = "../alloc" } + +[target.'cfg(not(windows))'.dependencies] +libc = { version = "0.2.0", optional = true, default-features = false } + +[features] +default = [] +std_detect_file_io = [ "libc" ] +std_detect_dlsym_getauxval = [ "libc" ] +std_detect_env_override = [ "libc" ] diff --git a/library/std_detect/README.md b/library/std_detect/README.md new file mode 100644 index 00000000000..edc90d319a1 --- /dev/null +++ b/library/std_detect/README.md @@ -0,0 +1,93 @@ +`std::detect` - Rust's standard library run-time CPU feature detection +======= + +The private `std::detect` module implements run-time feature detection in Rust's +standard library. This allows detecting whether the CPU the binary runs on +supports certain features, like SIMD instructions. + +# Usage + +`std::detect` APIs are available as part of `libstd`. Prefer using it via the +standard library than through this crate. Unstable features of `std::detect` are +available on nightly Rust behind various feature-gates. + +If you need run-time feature detection in `#[no_std]` environments, Rust `core` +library cannot help you. By design, Rust `core` is platform independent, but +performing run-time feature detection requires a certain level of cooperation +from the platform. + +You can then manually include `std_detect` as a dependency to get similar +run-time feature detection support than the one offered by Rust's standard +library. We intend to make `std_detect` more flexible and configurable in this +regard to better serve the needs of `#[no_std]` targets. + +# Features + +* `std_detect_dlsym_getauxval` (enabled by default, requires `libc`): Enable to +use `libc::dlsym` to query whether [`getauxval`] is linked into the binary. When +this is not the case, this feature allows other fallback methods to perform +run-time feature detection. When this feature is disabled, `std_detect` assumes +that [`getauxval`] is linked to the binary. If that is not the case the behavior +is undefined. + + Note: This feature is ignored on `*-linux-{gnu,musl,ohos}*` and `*-android*` targets + because we can safely assume `getauxval` is linked to the binary. + * `*-linux-gnu*` targets ([since Rust 1.64](https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html)) + have glibc requirements higher than [glibc 2.16 that added `getauxval`](https://sourceware.org/legacy-ml/libc-announce/2012/msg00000.html). + * `*-linux-musl*` targets ([at least since Rust 1.15](https://github.com/rust-lang/rust/blob/1.15.0/src/ci/docker/x86_64-musl/build-musl.sh#L15)) + use musl newer than [musl 1.1.0 that added `getauxval`](https://git.musl-libc.org/cgit/musl/tree/WHATSNEW?h=v1.1.0#n1197) + * `*-linux-ohos*` targets use a [fork of musl 1.2](https://gitee.com/openharmony/docs/blob/master/en/application-dev/reference/native-lib/musl.md) + * `*-android*` targets ([since Rust 1.68](https://blog.rust-lang.org/2023/01/09/android-ndk-update-r25.html)) + have the minimum supported API level higher than [Android 4.3 (API level 18) that added `getauxval`](https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/auxv.h#L49). + +* `std_detect_file_io` (enabled by default, requires `std`): Enable to perform run-time feature +detection using file APIs (e.g. `/proc/self/auxv`, etc.) if other more performant +methods fail. This feature requires `libstd` as a dependency, preventing the +crate from working on applications in which `std` is not available. + +[`getauxval`]: https://man7.org/linux/man-pages/man3/getauxval.3.html + +# Platform support + +* All `x86`/`x86_64` targets are supported on all platforms by querying the + `cpuid` instruction directly for the features supported by the hardware and + the operating system. `std_detect` assumes that the binary is an user-space + application. + +* Linux/Android: + * `arm{32, 64}`, `mips{32,64}{,el}`, `powerpc{32,64}{,le}`, `loongarch{32,64}`, `s390x`: + `std_detect` supports these on Linux by querying ELF auxiliary vectors (using `getauxval` + when available), and if that fails, by querying `/proc/self/auxv`. + * `arm64`: partial support for doing run-time feature detection by directly + querying `mrs` is implemented for Linux >= 4.11, but not enabled by default. + * `riscv{32,64}`: + `std_detect` supports these on Linux by querying `riscv_hwprobe`, and + by querying ELF auxiliary vectors (using `getauxval` when available). + +* FreeBSD: + * `arm32`, `powerpc64`: `std_detect` supports these on FreeBSD by querying ELF + auxiliary vectors using `sysctl`. + * `arm64`: run-time feature detection is implemented by directly querying `mrs`. + +* OpenBSD: + * `arm64`: run-time feature detection is implemented by querying `sysctl`. + +* Windows: + * `arm64`: run-time feature detection is implemented by querying `IsProcessorFeaturePresent`. + +# License + +This project is licensed under either of + + * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or + http://www.apache.org/licenses/LICENSE-2.0) + * MIT license ([LICENSE-MIT](LICENSE-MIT) or + http://opensource.org/licenses/MIT) + +at your option. + +# Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in `std_detect` by you, as defined in the Apache-2.0 license, +shall be dual licensed as above, without any additional terms or conditions. diff --git a/library/std_detect/src/detect/arch/aarch64.rs b/library/std_detect/src/detect/arch/aarch64.rs new file mode 100644 index 00000000000..13570a25c1c --- /dev/null +++ b/library/std_detect/src/detect/arch/aarch64.rs @@ -0,0 +1,259 @@ +//! Aarch64 run-time features. + +features! { + @TARGET: aarch64; + @CFG: any(target_arch = "aarch64", target_arch = "arm64ec"); + @MACRO_NAME: is_aarch64_feature_detected; + @MACRO_ATTRS: + /// This macro tests, at runtime, whether an `aarch64` feature is enabled on aarch64 platforms. + /// Currently most features are only supported on linux-based platforms. + /// + /// This macro takes one argument which is a string literal of the feature being tested for. + /// The feature names are mostly taken from their FEAT_* definitions in the [ARM Architecture + /// Reference Manual][docs]. + /// + /// ## Supported arguments + /// + /// * `"aes"` - FEAT_AES & FEAT_PMULL + /// * `"asimd"` or "neon" - FEAT_AdvSIMD + /// * `"bf16"` - FEAT_BF16 + /// * `"bti"` - FEAT_BTI + /// * `"crc"` - FEAT_CRC + /// * `"cssc"` - FEAT_CSSC + /// * `"dit"` - FEAT_DIT + /// * `"dotprod"` - FEAT_DotProd + /// * `"dpb"` - FEAT_DPB + /// * `"dpb2"` - FEAT_DPB2 + /// * `"ecv"` - FEAT_ECV + /// * `"f32mm"` - FEAT_F32MM + /// * `"f64mm"` - FEAT_F64MM + /// * `"faminmax"` - FEAT_FAMINMAX + /// * `"fcma"` - FEAT_FCMA + /// * `"fhm"` - FEAT_FHM + /// * `"flagm"` - FEAT_FLAGM + /// * `"flagm2"` - FEAT_FLAGM2 + /// * `"fp"` - FEAT_FP + /// * `"fp16"` - FEAT_FP16 + /// * `"fp8"` - FEAT_FP8 + /// * `"fp8dot2"` - FEAT_FP8DOT2 + /// * `"fp8dot4"` - FEAT_FP8DOT4 + /// * `"fp8fma"` - FEAT_FP8FMA + /// * `"fpmr"` - FEAT_FPMR + /// * `"frintts"` - FEAT_FRINTTS + /// * `"hbc"` - FEAT_HBC + /// * `"i8mm"` - FEAT_I8MM + /// * `"jsconv"` - FEAT_JSCVT + /// * `"lse"` - FEAT_LSE + /// * `"lse128"` - FEAT_LSE128 + /// * `"lse2"` - FEAT_LSE2 + /// * `"lut"` - FEAT_LUT + /// * `"mops"` - FEAT_MOPS + /// * `"mte"` - FEAT_MTE & FEAT_MTE2 + /// * `"paca"` - FEAT_PAuth (address authentication) + /// * `"pacg"` - FEAT_Pauth (generic authentication) + /// * `"pauth-lr"` - FEAT_PAuth_LR + /// * `"pmull"` - FEAT_PMULL + /// * `"rand"` - FEAT_RNG + /// * `"rcpc"` - FEAT_LRCPC + /// * `"rcpc2"` - FEAT_LRCPC2 + /// * `"rcpc3"` - FEAT_LRCPC3 + /// * `"rdm"` - FEAT_RDM + /// * `"sb"` - FEAT_SB + /// * `"sha2"` - FEAT_SHA1 & FEAT_SHA256 + /// * `"sha3"` - FEAT_SHA512 & FEAT_SHA3 + /// * `"sm4"` - FEAT_SM3 & FEAT_SM4 + /// * `"sme"` - FEAT_SME + /// * `"sme-b16b16"` - FEAT_SME_B16B16 + /// * `"sme-f16f16"` - FEAT_SME_F16F16 + /// * `"sme-f64f64"` - FEAT_SME_F64F64 + /// * `"sme-f8f16"` - FEAT_SME_F8F16 + /// * `"sme-f8f32"` - FEAT_SME_F8F32 + /// * `"sme-fa64"` - FEAT_SME_FA64 + /// * `"sme-i16i64"` - FEAT_SME_I16I64 + /// * `"sme-lutv2"` - FEAT_SME_LUTv2 + /// * `"sme2"` - FEAT_SME2 + /// * `"sme2p1"` - FEAT_SME2p1 + /// * `"ssbs"` - FEAT_SSBS & FEAT_SSBS2 + /// * `"ssve-fp8dot2"` - FEAT_SSVE_FP8DOT2 + /// * `"ssve-fp8dot4"` - FEAT_SSVE_FP8DOT4 + /// * `"ssve-fp8fma"` - FEAT_SSVE_FP8FMA + /// * `"sve"` - FEAT_SVE + /// * `"sve-b16b16"` - FEAT_SVE_B16B16 (SVE or SME Z-targeting instructions) + /// * `"sve2"` - FEAT_SVE2 + /// * `"sve2-aes"` - FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto) + /// * `"sve2-bitperm"` - FEAT_SVE2_BitPerm + /// * `"sve2-sha3"` - FEAT_SVE2_SHA3 + /// * `"sve2-sm4"` - FEAT_SVE2_SM4 + /// * `"sve2p1"` - FEAT_SVE2p1 + /// * `"tme"` - FEAT_TME + /// * `"wfxt"` - FEAT_WFxT + /// + /// [docs]: https://developer.arm.com/documentation/ddi0487/latest + #[stable(feature = "simd_aarch64", since = "1.60.0")] + @BIND_FEATURE_NAME: "asimd"; "neon"; + @NO_RUNTIME_DETECTION: "ras"; + @NO_RUNTIME_DETECTION: "v8.1a"; + @NO_RUNTIME_DETECTION: "v8.2a"; + @NO_RUNTIME_DETECTION: "v8.3a"; + @NO_RUNTIME_DETECTION: "v8.4a"; + @NO_RUNTIME_DETECTION: "v8.5a"; + @NO_RUNTIME_DETECTION: "v8.6a"; + @NO_RUNTIME_DETECTION: "v8.7a"; + @NO_RUNTIME_DETECTION: "v8.8a"; + @NO_RUNTIME_DETECTION: "v8.9a"; + @NO_RUNTIME_DETECTION: "v9.1a"; + @NO_RUNTIME_DETECTION: "v9.2a"; + @NO_RUNTIME_DETECTION: "v9.3a"; + @NO_RUNTIME_DETECTION: "v9.4a"; + @NO_RUNTIME_DETECTION: "v9.5a"; + @NO_RUNTIME_DETECTION: "v9a"; + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] asimd: "neon"; + /// FEAT_AdvSIMD (Advanced SIMD/NEON) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] pmull: "pmull"; + implied by target_features: ["aes"]; + /// FEAT_PMULL (Polynomial Multiply) - Implied by `aes` target_feature + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fp: "fp"; + implied by target_features: ["neon"]; + /// FEAT_FP (Floating point support) - Implied by `neon` target_feature + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] aes: "aes"; + /// FEAT_AES (AES SIMD instructions) & FEAT_PMULL (PMULL{2}, 64-bit operand variants) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] bf16: "bf16"; + /// FEAT_BF16 (BFloat16 type, plus MM instructions, plus ASIMD support) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] bti: "bti"; + /// FEAT_BTI (Branch Target Identification) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] crc: "crc"; + /// FEAT_CRC32 (Cyclic Redundancy Check) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] cssc: "cssc"; + /// FEAT_CSSC (Common Short Sequence Compression instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dit: "dit"; + /// FEAT_DIT (Data Independent Timing instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dpb: "dpb"; + /// FEAT_DPB (aka dcpop - data cache clean to point of persistence) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dpb2: "dpb2"; + /// FEAT_DPB2 (aka dcpodp - data cache clean to point of deep persistence) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dotprod: "dotprod"; + /// FEAT_DotProd (Vector Dot-Product - ASIMDDP) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ecv: "ecv"; + /// FEAT_ECV (Enhanced Counter Virtualization) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] f32mm: "f32mm"; + /// FEAT_F32MM (single-precision matrix multiplication) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] f64mm: "f64mm"; + /// FEAT_F64MM (double-precision matrix multiplication) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] faminmax: "faminmax"; + /// FEAT_FAMINMAX (FAMIN and FAMAX SIMD/SVE/SME instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fcma: "fcma"; + /// FEAT_FCMA (float complex number operations) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fhm: "fhm"; + /// FEAT_FHM (fp16 multiplication instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] flagm: "flagm"; + /// FEAT_FLAGM (flag manipulation instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] flagm2: "flagm2"; + /// FEAT_FLAGM2 (flag manipulation instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fp16: "fp16"; + /// FEAT_FP16 (Half-float support) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8: "fp8"; + /// FEAT_FP8 (F8CVT Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8dot2: "fp8dot2"; + /// FEAT_FP8DOT2 (F8DP2 Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8dot4: "fp8dot4"; + /// FEAT_FP8DOT4 (F8DP4 Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8fma: "fp8fma"; + /// FEAT_FP8FMA (F8FMA Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fpmr: "fpmr"; + without cfg check: true; + /// FEAT_FPMR (Special-purpose AArch64-FPMR register) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] frintts: "frintts"; + /// FEAT_FRINTTS (float to integer rounding instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] hbc: "hbc"; + /// FEAT_HBC (Hinted conditional branches) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] i8mm: "i8mm"; + /// FEAT_I8MM (integer matrix multiplication, plus ASIMD support) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] jsconv: "jsconv"; + /// FEAT_JSCVT (JavaScript float conversion instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] lse: "lse"; + /// FEAT_LSE (Large System Extension - atomics) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] lse128: "lse128"; + /// FEAT_LSE128 (128-bit atomics) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] lse2: "lse2"; + /// FEAT_LSE2 (unaligned and register-pair atomics) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] lut: "lut"; + /// FEAT_LUT (Lookup Table Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] mops: "mops"; + /// FEAT_MOPS (Standardization of memory operations) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] mte: "mte"; + /// FEAT_MTE & FEAT_MTE2 (Memory Tagging Extension) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] paca: "paca"; + /// FEAT_PAuth (address authentication) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] pacg: "pacg"; + /// FEAT_PAuth (generic authentication) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] pauth_lr: "pauth-lr"; + /// FEAT_PAuth_LR + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rand: "rand"; + /// FEAT_RNG (Random Number Generator) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc: "rcpc"; + /// FEAT_LRCPC (Release consistent Processor consistent) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc2: "rcpc2"; + /// FEAT_LRCPC2 (RCPC with immediate offsets) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] rcpc3: "rcpc3"; + /// FEAT_LRCPC3 (RCPC Instructions v3) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rdm: "rdm"; + /// FEAT_RDM (Rounding Doubling Multiply - ASIMDRDM) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sb: "sb"; + /// FEAT_SB (speculation barrier) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sha2: "sha2"; + /// FEAT_SHA1 & FEAT_SHA256 (SHA1 & SHA2-256 instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sha3: "sha3"; + /// FEAT_SHA512 & FEAT_SHA3 (SHA2-512 & SHA3 instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sm4: "sm4"; + /// FEAT_SM3 & FEAT_SM4 (SM3 & SM4 instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme: "sme"; + /// FEAT_SME (Scalable Matrix Extension) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme2: "sme2"; + /// FEAT_SME2 (SME Version 2) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme2p1: "sme2p1"; + /// FEAT_SME2p1 (SME Version 2.1) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_b16b16: "sme-b16b16"; + /// FEAT_SME_B16B16 + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f16f16: "sme-f16f16"; + /// FEAT_SME_F16F16 (Non-widening half-precision FP16 to FP16 arithmetic for SME2) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f64f64: "sme-f64f64"; + /// FEAT_SME_F64F64 (Double-precision floating-point outer product instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f8f16: "sme-f8f16"; + /// FEAT_SME_F8F16 + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f8f32: "sme-f8f32"; + /// FEAT_SME_F8F32 + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_fa64: "sme-fa64"; + /// FEAT_SME_FA64 (Full A64 instruction set support in Streaming SVE mode) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_i16i64: "sme-i16i64"; + /// FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_lutv2: "sme-lutv2"; + /// FEAT_SME_LUTv2 (LUTI4 Instruction) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] ssbs: "ssbs"; + /// FEAT_SSBS & FEAT_SSBS2 (speculative store bypass safe) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8dot2: "ssve-fp8dot2"; + /// FEAT_SSVE_FP8DOT2 + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8dot4: "ssve-fp8dot4"; + /// FEAT_SSVE_FP8DOT4 + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8fma: "ssve-fp8fma"; + /// FEAT_SSVE_FP8FMA + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve: "sve"; + /// FEAT_SVE (Scalable Vector Extension) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2: "sve2"; + /// FEAT_SVE2 (Scalable Vector Extension 2) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve2p1: "sve2p1"; + /// FEAT_SVE2p1 (Scalable Vector Extension 2.1) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_aes: "sve2-aes"; + /// FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve_b16b16: "sve-b16b16"; + /// FEAT_SVE_B16B16 (SVE or SME Z-targeting instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_bitperm: "sve2-bitperm"; + /// FEAT_SVE_BitPerm (SVE2 bit permutation instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sha3: "sve2-sha3"; + /// FEAT_SVE_SHA3 (SVE2 SHA3 crypto) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sm4: "sve2-sm4"; + /// FEAT_SVE_SM4 (SVE2 SM4 crypto) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] tme: "tme"; + /// FEAT_TME (Transactional Memory Extensions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] wfxt: "wfxt"; + /// FEAT_WFxT (WFET and WFIT Instructions) +} diff --git a/library/std_detect/src/detect/arch/arm.rs b/library/std_detect/src/detect/arch/arm.rs new file mode 100644 index 00000000000..c3c8883ce31 --- /dev/null +++ b/library/std_detect/src/detect/arch/arm.rs @@ -0,0 +1,29 @@ +//! Run-time feature detection on ARM Aarch32. + +features! { + @TARGET: arm; + @CFG: target_arch = "arm"; + @MACRO_NAME: is_arm_feature_detected; + @MACRO_ATTRS: + /// Checks if `arm` feature is enabled. + #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] + @NO_RUNTIME_DETECTION: "v7"; + @NO_RUNTIME_DETECTION: "vfp2"; + @NO_RUNTIME_DETECTION: "vfp3"; + @NO_RUNTIME_DETECTION: "vfp4"; + @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] neon: "neon"; + /// ARM Advanced SIMD (NEON) - Aarch32 + @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] pmull: "pmull"; + without cfg check: true; + /// Polynomial Multiply + @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] crc: "crc"; + /// CRC32 (Cyclic Redundancy Check) + @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] aes: "aes"; + /// FEAT_AES (AES instructions) + @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] sha2: "sha2"; + /// FEAT_SHA1 & FEAT_SHA256 (SHA1 & SHA2-256 instructions) + @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] i8mm: "i8mm"; + /// FEAT_I8MM (integer matrix multiplication, plus ASIMD support) + @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] dotprod: "dotprod"; + /// FEAT_DotProd (Vector Dot-Product - ASIMDDP) +} diff --git a/library/std_detect/src/detect/arch/loongarch.rs b/library/std_detect/src/detect/arch/loongarch.rs new file mode 100644 index 00000000000..68fc600fa8e --- /dev/null +++ b/library/std_detect/src/detect/arch/loongarch.rs @@ -0,0 +1,51 @@ +//! Run-time feature detection on LoongArch. + +features! { + @TARGET: loongarch; + @CFG: any(target_arch = "loongarch32", target_arch = "loongarch64"); + @MACRO_NAME: is_loongarch_feature_detected; + @MACRO_ATTRS: + /// Checks if `loongarch` feature is enabled. + /// Supported arguments are: + /// + /// * `"f"` + /// * `"d"` + /// * `"frecipe"` + /// * `"div32"` + /// * `"lsx"` + /// * `"lasx"` + /// * `"lam-bh"` + /// * `"lamcas"` + /// * `"ld-seq-sa"` + /// * `"scq"` + /// * `"lbt"` + /// * `"lvz"` + /// * `"ual"` + #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] + @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] f: "f"; + /// F + @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] d: "d"; + /// D + @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] frecipe: "frecipe"; + /// Frecipe + @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] div32: "div32"; + /// Div32 + @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] lsx: "lsx"; + /// LSX + @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] lasx: "lasx"; + /// LASX + @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lam_bh: "lam-bh"; + /// LAM-BH + @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lamcas: "lamcas"; + /// LAM-CAS + @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] ld_seq_sa: "ld-seq-sa"; + /// LD-SEQ-SA + @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] scq: "scq"; + /// SCQ + @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] lbt: "lbt"; + /// LBT + @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] lvz: "lvz"; + /// LVZ + @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] ual: "ual"; + /// UAL +} diff --git a/library/std_detect/src/detect/arch/mips.rs b/library/std_detect/src/detect/arch/mips.rs new file mode 100644 index 00000000000..e185fdfcaac --- /dev/null +++ b/library/std_detect/src/detect/arch/mips.rs @@ -0,0 +1,12 @@ +//! Run-time feature detection on MIPS. + +features! { + @TARGET: mips; + @CFG: target_arch = "mips"; + @MACRO_NAME: is_mips_feature_detected; + @MACRO_ATTRS: + /// Checks if `mips` feature is enabled. + #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] + @FEATURE: #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] msa: "msa"; + /// MIPS SIMD Architecture (MSA) +} diff --git a/library/std_detect/src/detect/arch/mips64.rs b/library/std_detect/src/detect/arch/mips64.rs new file mode 100644 index 00000000000..69fe4869d30 --- /dev/null +++ b/library/std_detect/src/detect/arch/mips64.rs @@ -0,0 +1,12 @@ +//! Run-time feature detection on MIPS64. + +features! { + @TARGET: mips64; + @CFG: target_arch = "mips64"; + @MACRO_NAME: is_mips64_feature_detected; + @MACRO_ATTRS: + /// Checks if `mips64` feature is enabled. + #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] + @FEATURE: #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] msa: "msa"; + /// MIPS SIMD Architecture (MSA) +} diff --git a/library/std_detect/src/detect/arch/mod.rs b/library/std_detect/src/detect/arch/mod.rs new file mode 100644 index 00000000000..b0be554ed89 --- /dev/null +++ b/library/std_detect/src/detect/arch/mod.rs @@ -0,0 +1,75 @@ +#![allow(dead_code)] + +use cfg_if::cfg_if; + +// Export the macros for all supported architectures. +#[macro_use] +mod x86; +#[macro_use] +mod arm; +#[macro_use] +mod aarch64; +#[macro_use] +mod riscv; +#[macro_use] +mod powerpc; +#[macro_use] +mod powerpc64; +#[macro_use] +mod mips; +#[macro_use] +mod mips64; +#[macro_use] +mod loongarch; +#[macro_use] +mod s390x; + +cfg_if! { + if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { + #[stable(feature = "simd_x86", since = "1.27.0")] + pub use x86::*; + } else if #[cfg(target_arch = "arm")] { + #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] + pub use arm::*; + } else if #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] { + #[stable(feature = "simd_aarch64", since = "1.60.0")] + pub use aarch64::*; + } else if #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] { + #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] + pub use riscv::*; + } else if #[cfg(target_arch = "powerpc")] { + #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] + pub use powerpc::*; + } else if #[cfg(target_arch = "powerpc64")] { + #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] + pub use powerpc64::*; + } else if #[cfg(target_arch = "mips")] { + #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] + pub use mips::*; + } else if #[cfg(target_arch = "mips64")] { + #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] + pub use mips64::*; + } else if #[cfg(any(target_arch = "loongarch32", target_arch = "loongarch64"))] { + #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] + pub use loongarch::*; + } else if #[cfg(target_arch = "s390x")] { + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + pub use s390x::*; + } else { + // Unimplemented architecture: + #[doc(hidden)] + pub(crate) enum Feature { + Null + } + #[doc(hidden)] + #[unstable(feature = "stdarch_internal", issue = "none")] + pub mod __is_feature_detected {} + + impl Feature { + #[doc(hidden)] + pub(crate) fn from_str(_s: &str) -> Result<Feature, ()> { Err(()) } + #[doc(hidden)] + pub(crate) fn to_str(self) -> &'static str { "" } + } + } +} diff --git a/library/std_detect/src/detect/arch/powerpc.rs b/library/std_detect/src/detect/arch/powerpc.rs new file mode 100644 index 00000000000..c390993a48a --- /dev/null +++ b/library/std_detect/src/detect/arch/powerpc.rs @@ -0,0 +1,30 @@ +//! Run-time feature detection on PowerPC. + +features! { + @TARGET: powerpc; + @CFG: target_arch = "powerpc"; + @MACRO_NAME: is_powerpc_feature_detected; + @MACRO_ATTRS: + /// Checks if `powerpc` feature is enabled. + #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] altivec: "altivec"; + /// Altivec + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] vsx: "vsx"; + /// VSX + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8: "power8"; + without cfg check: true; + /// Power8 + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_altivec: "power8-altivec"; + /// Power8 altivec + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_vector: "power8-vector"; + /// Power8 vector + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_crypto: "power8-crypto"; + /// Power8 crypto + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9: "power9"; + without cfg check: true; + /// Power9 + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9_altivec: "power9-altivec"; + /// Power9 altivec + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9_vector: "power9-vector"; + /// Power9 vector +} diff --git a/library/std_detect/src/detect/arch/powerpc64.rs b/library/std_detect/src/detect/arch/powerpc64.rs new file mode 100644 index 00000000000..cf05baa6f79 --- /dev/null +++ b/library/std_detect/src/detect/arch/powerpc64.rs @@ -0,0 +1,30 @@ +//! Run-time feature detection on PowerPC64. + +features! { + @TARGET: powerpc64; + @CFG: target_arch = "powerpc64"; + @MACRO_NAME: is_powerpc64_feature_detected; + @MACRO_ATTRS: + /// Checks if `powerpc` feature is enabled. + #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] altivec: "altivec"; + /// Altivec + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] vsx: "vsx"; + /// VSX + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8: "power8"; + without cfg check: true; + /// Power8 + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_altivec: "power8-altivec"; + /// Power8 altivec + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_vector: "power8-vector"; + /// Power8 vector + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_crypto: "power8-crypto"; + /// Power8 crypto + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9: "power9"; + without cfg check: true; + /// Power9 + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9_altivec: "power9-altivec"; + /// Power9 altivec + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9_vector: "power9-vector"; + /// Power9 vector +} diff --git a/library/std_detect/src/detect/arch/riscv.rs b/library/std_detect/src/detect/arch/riscv.rs new file mode 100644 index 00000000000..b86190d7bbf --- /dev/null +++ b/library/std_detect/src/detect/arch/riscv.rs @@ -0,0 +1,344 @@ +//! Run-time feature detection on RISC-V. + +features! { + @TARGET: riscv; + @CFG: any(target_arch = "riscv32", target_arch = "riscv64"); + @MACRO_NAME: is_riscv_feature_detected; + @MACRO_ATTRS: + /// A macro to test at *runtime* whether instruction sets are available on + /// RISC-V platforms. + /// + /// RISC-V standard defined the base sets and the extension sets. + /// The base sets are RV32I, RV64I, RV32E or RV128I. Any RISC-V platform + /// must support one base set and/or multiple extension sets. + /// + /// Any RISC-V standard instruction sets can be in state of either ratified, + /// frozen or draft. The version and status of current standard instruction + /// sets can be checked out from preface section of the [ISA manual]. + /// + /// Platform may define and support their own custom instruction sets with + /// ISA prefix X. These sets are highly platform specific and should be + /// detected with their own platform support crates. + /// + /// [ISA manual]: https://riscv.org/specifications/ratified/ + /// + /// # Platform-specific/agnostic Behavior and Availability + /// + /// Runtime detection depends on the platform-specific feature detection + /// facility and its availability per feature is + /// highly platform/version-specific. + /// + /// Still, a best-effort attempt is performed to enable subset/dependent + /// features if a superset feature is enabled regardless of the platform. + /// For instance, if the A extension (`"a"`) is enabled, its subsets (the + /// Zalrsc and Zaamo extensions; `"zalrsc"` and `"zaamo"`) are also enabled. + /// Likewise, if the F extension (`"f"`) is enabled, one of its dependencies + /// (the Zicsr extension `"zicsr"`) is also enabled. + /// + /// # Unprivileged Specification + /// + /// The supported ratified RISC-V instruction sets are as follows: + /// + /// * RV32E: `"rv32e"` + /// * RV32I: `"rv32i"` + /// * RV64I: `"rv64i"` + /// * A: `"a"` + /// * Zaamo: `"zaamo"` + /// * Zalrsc: `"zalrsc"` + /// * B: `"b"` + /// * Zba: `"zba"` + /// * Zbb: `"zbb"` + /// * Zbs: `"zbs"` + /// * C: `"c"` + /// * Zca: `"zca"` + /// * Zcd: `"zcd"` (if D is enabled) + /// * Zcf: `"zcf"` (if F is enabled on RV32) + /// * D: `"d"` + /// * F: `"f"` + /// * M: `"m"` + /// * Q: `"q"` + /// * V: `"v"` + /// * Zve32x: `"zve32x"` + /// * Zve32f: `"zve32f"` + /// * Zve64x: `"zve64x"` + /// * Zve64f: `"zve64f"` + /// * Zve64d: `"zve64d"` + /// * Zicbom: `"zicbom"` + /// * Zicboz: `"zicboz"` + /// * Zicntr: `"zicntr"` + /// * Zicond: `"zicond"` + /// * Zicsr: `"zicsr"` + /// * Zifencei: `"zifencei"` + /// * Zihintntl: `"zihintntl"` + /// * Zihintpause: `"zihintpause"` + /// * Zihpm: `"zihpm"` + /// * Zimop: `"zimop"` + /// * Zacas: `"zacas"` + /// * Zawrs: `"zawrs"` + /// * Zfa: `"zfa"` + /// * Zfbfmin: `"zfbfmin"` + /// * Zfh: `"zfh"` + /// * Zfhmin: `"zfhmin"` + /// * Zfinx: `"zfinx"` + /// * Zdinx: `"zdinx"` + /// * Zhinx: `"zhinx"` + /// * Zhinxmin: `"zhinxmin"` + /// * Zcb: `"zcb"` + /// * Zcmop: `"zcmop"` + /// * Zbc: `"zbc"` + /// * Zbkb: `"zbkb"` + /// * Zbkc: `"zbkc"` + /// * Zbkx: `"zbkx"` + /// * Zk: `"zk"` + /// * Zkn: `"zkn"` + /// * Zknd: `"zknd"` + /// * Zkne: `"zkne"` + /// * Zknh: `"zknh"` + /// * Zkr: `"zkr"` + /// * Zks: `"zks"` + /// * Zksed: `"zksed"` + /// * Zksh: `"zksh"` + /// * Zkt: `"zkt"` + /// * Zvbb: `"zvbb"` + /// * Zvbc: `"zvbc"` + /// * Zvfbfmin: `"zvfbfmin"` + /// * Zvfbfwma: `"zvfbfwma"` + /// * Zvfh: `"zvfh"` + /// * Zvfhmin: `"zvfhmin"` + /// * Zvkb: `"zvkb"` + /// * Zvkg: `"zvkg"` + /// * Zvkn: `"zvkn"` + /// * Zvkned: `"zvkned"` + /// * Zvknha: `"zvknha"` + /// * Zvknhb: `"zvknhb"` + /// * Zvknc: `"zvknc"` + /// * Zvkng: `"zvkng"` + /// * Zvks: `"zvks"` + /// * Zvksed: `"zvksed"` + /// * Zvksh: `"zvksh"` + /// * Zvksc: `"zvksc"` + /// * Zvksg: `"zvksg"` + /// * Zvkt: `"zvkt"` + /// * Ztso: `"ztso"` + /// + /// There's also bases and extensions marked as standard instruction set, + /// but they are in frozen or draft state. These instruction sets are also + /// reserved by this macro and can be detected in the future platforms. + /// + /// Draft RISC-V instruction sets: + /// + /// * RV128I: `"rv128i"` + /// * J: `"j"` + /// * P: `"p"` + /// * Zam: `"zam"` + /// + /// # Performance Hints + /// + /// The two features below define performance hints for unaligned + /// scalar/vector memory accesses, respectively. If enabled, it denotes that + /// corresponding unaligned memory access is reasonably fast. + /// + /// * `"unaligned-scalar-mem"` + /// * Runtime detection requires Linux kernel version 6.4 or later. + /// * `"unaligned-vector-mem"` + /// * Runtime detection requires Linux kernel version 6.13 or later. + #[stable(feature = "riscv_ratified", since = "1.78.0")] + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv32i: "rv32i"; + without cfg check: true; + /// RV32I Base Integer Instruction Set + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv32e: "rv32e"; + without cfg check: true; + /// RV32E Base Integer Instruction Set + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv64i: "rv64i"; + without cfg check: true; + /// RV64I Base Integer Instruction Set + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv128i: "rv128i"; + without cfg check: true; + /// RV128I Base Integer Instruction Set + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] unaligned_scalar_mem: "unaligned-scalar-mem"; + /// Has reasonably performant unaligned scalar + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] unaligned_vector_mem: "unaligned-vector-mem"; + /// Has reasonably performant unaligned vector + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicsr: "zicsr"; + /// "Zicsr" Extension for Control and Status Register (CSR) Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicntr: "zicntr"; + /// "Zicntr" Extension for Base Counters and Timers + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihpm: "zihpm"; + /// "Zihpm" Extension for Hardware Performance Counters + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zifencei: "zifencei"; + /// "Zifencei" Extension for Instruction-Fetch Fence + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihintntl: "zihintntl"; + /// "Zihintntl" Extension for Non-Temporal Locality Hints + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihintpause: "zihintpause"; + /// "Zihintpause" Extension for Pause Hint + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zimop: "zimop"; + /// "Zimop" Extension for May-Be-Operations + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicbom: "zicbom"; + /// "Zicbom" Extension for Cache-Block Management Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicboz: "zicboz"; + /// "Zicboz" Extension for Cache-Block Zero Instruction + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicond: "zicond"; + /// "Zicond" Extension for Integer Conditional Operations + + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] m: "m"; + /// "M" Extension for Integer Multiplication and Division + + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] a: "a"; + /// "A" Extension for Atomic Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zalrsc: "zalrsc"; + /// "Zalrsc" Extension for Load-Reserved/Store-Conditional Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zaamo: "zaamo"; + /// "Zaamo" Extension for Atomic Memory Operations + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zawrs: "zawrs"; + /// "Zawrs" Extension for Wait-on-Reservation-Set Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zacas: "zacas"; + /// "Zacas" Extension for Atomic Compare-and-Swap (CAS) Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zam: "zam"; + without cfg check: true; + /// "Zam" Extension for Misaligned Atomics + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] ztso: "ztso"; + /// "Ztso" Extension for Total Store Ordering + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] f: "f"; + /// "F" Extension for Single-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] d: "d"; + /// "D" Extension for Double-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] q: "q"; + without cfg check: true; + /// "Q" Extension for Quad-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfh: "zfh"; + /// "Zfh" Extension for Half-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfhmin: "zfhmin"; + /// "Zfhmin" Extension for Minimal Half-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfa: "zfa"; + /// "Zfa" Extension for Additional Floating-Point Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfbfmin: "zfbfmin"; + /// "Zfbfmin" Extension for Scalar BF16 Converts + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfinx: "zfinx"; + /// "Zfinx" Extension for Single-Precision Floating-Point in Integer Registers + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zdinx: "zdinx"; + /// "Zdinx" Extension for Double-Precision Floating-Point in Integer Registers + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zhinx: "zhinx"; + /// "Zhinx" Extension for Half-Precision Floating-Point in Integer Registers + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zhinxmin: "zhinxmin"; + /// "Zhinxmin" Extension for Minimal Half-Precision Floating-Point in Integer Registers + + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] c: "c"; + /// "C" Extension for Compressed Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zca: "zca"; + /// "Zca" Compressed Instructions excluding Floating-Point Loads/Stores + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zcf: "zcf"; + without cfg check: true; + /// "Zcf" Compressed Instructions for Single-Precision Floating-Point Loads/Stores on RV32 + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zcd: "zcd"; + without cfg check: true; + /// "Zcd" Compressed Instructions for Double-Precision Floating-Point Loads/Stores + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zcb: "zcb"; + /// "Zcb" Simple Code-size Saving Compressed Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zcmop: "zcmop"; + /// "Zcmop" Extension for Compressed May-Be-Operations + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] b: "b"; + /// "B" Extension for Bit Manipulation + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zba: "zba"; + /// "Zba" Extension for Address Generation + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbb: "zbb"; + /// "Zbb" Extension for Basic Bit-Manipulation + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbc: "zbc"; + /// "Zbc" Extension for Carry-less Multiplication + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbs: "zbs"; + /// "Zbs" Extension for Single-Bit Instructions + + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbkb: "zbkb"; + /// "Zbkb" Extension for Bit-Manipulation for Cryptography + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbkc: "zbkc"; + /// "Zbkc" Extension for Carry-less Multiplication for Cryptography + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbkx: "zbkx"; + /// "Zbkx" Extension for Crossbar Permutations + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zknd: "zknd"; + /// "Zknd" Cryptography Extension for NIST Suite: AES Decryption + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkne: "zkne"; + /// "Zkne" Cryptography Extension for NIST Suite: AES Encryption + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zknh: "zknh"; + /// "Zknh" Cryptography Extension for NIST Suite: Hash Function Instructions + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zksed: "zksed"; + /// "Zksed" Cryptography Extension for ShangMi Suite: SM4 Block Cipher Instructions + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zksh: "zksh"; + /// "Zksh" Cryptography Extension for ShangMi Suite: SM3 Hash Function Instructions + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkr: "zkr"; + /// "Zkr" Entropy Source Extension + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkn: "zkn"; + /// "Zkn" Cryptography Extension for NIST Algorithm Suite + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zks: "zks"; + /// "Zks" Cryptography Extension for ShangMi Algorithm Suite + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zk: "zk"; + /// "Zk" Cryptography Extension for Standard Scalar Cryptography + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkt: "zkt"; + /// "Zkt" Cryptography Extension for Data Independent Execution Latency + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] v: "v"; + /// "V" Extension for Vector Operations + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zve32x: "zve32x"; + /// "Zve32x" Vector Extension for Embedded Processors (32-bit+; Integer) + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zve32f: "zve32f"; + /// "Zve32f" Vector Extension for Embedded Processors (32-bit+; with Single-Precision Floating-Point) + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zve64x: "zve64x"; + /// "Zve64x" Vector Extension for Embedded Processors (64-bit+; Integer) + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zve64f: "zve64f"; + /// "Zve64f" Vector Extension for Embedded Processors (64-bit+; with Single-Precision Floating-Point) + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zve64d: "zve64d"; + /// "Zve64d" Vector Extension for Embedded Processors (64-bit+; with Double-Precision Floating-Point) + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvfh: "zvfh"; + /// "Zvfh" Vector Extension for Half-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvfhmin: "zvfhmin"; + /// "Zvfhmin" Vector Extension for Minimal Half-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvfbfmin: "zvfbfmin"; + /// "Zvfbfmin" Vector Extension for BF16 Converts + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvfbfwma: "zvfbfwma"; + /// "Zvfbfwma" Vector Extension for BF16 Widening Multiply-Add + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvbb: "zvbb"; + /// "Zvbb" Extension for Vector Basic Bit-Manipulation + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvbc: "zvbc"; + /// "Zvbc" Extension for Vector Carryless Multiplication + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkb: "zvkb"; + /// "Zvkb" Extension for Vector Cryptography Bit-Manipulation + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkg: "zvkg"; + /// "Zvkg" Cryptography Extension for Vector GCM/GMAC + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkned: "zvkned"; + /// "Zvkned" Cryptography Extension for NIST Suite: Vector AES Block Cipher + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvknha: "zvknha"; + /// "Zvknha" Cryptography Extension for Vector SHA-2 Secure Hash (SHA-256) + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvknhb: "zvknhb"; + /// "Zvknhb" Cryptography Extension for Vector SHA-2 Secure Hash (SHA-256/512) + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvksed: "zvksed"; + /// "Zvksed" Cryptography Extension for ShangMi Suite: Vector SM4 Block Cipher + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvksh: "zvksh"; + /// "Zvksh" Cryptography Extension for ShangMi Suite: Vector SM3 Secure Hash + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkn: "zvkn"; + /// "Zvkn" Cryptography Extension for NIST Algorithm Suite + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvknc: "zvknc"; + /// "Zvknc" Cryptography Extension for NIST Algorithm Suite with Carryless Multiply + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkng: "zvkng"; + /// "Zvkng" Cryptography Extension for NIST Algorithm Suite with GCM + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvks: "zvks"; + /// "Zvks" Cryptography Extension for ShangMi Algorithm Suite + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvksc: "zvksc"; + /// "Zvksc" Cryptography Extension for ShangMi Algorithm Suite with Carryless Multiply + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvksg: "zvksg"; + /// "Zvksg" Cryptography Extension for ShangMi Algorithm Suite with GCM + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkt: "zvkt"; + /// "Zvkt" Extension for Vector Data-Independent Execution Latency + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] j: "j"; + without cfg check: true; + /// "J" Extension for Dynamically Translated Languages + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] p: "p"; + without cfg check: true; + /// "P" Extension for Packed-SIMD Instructions +} diff --git a/library/std_detect/src/detect/arch/s390x.rs b/library/std_detect/src/detect/arch/s390x.rs new file mode 100644 index 00000000000..4c20d011680 --- /dev/null +++ b/library/std_detect/src/detect/arch/s390x.rs @@ -0,0 +1,81 @@ +//! Run-time feature detection on s390x. + +features! { + @TARGET: s390x; + @CFG: target_arch = "s390x"; + @MACRO_NAME: is_s390x_feature_detected; + @MACRO_ATTRS: + /// Checks if `s390x` feature is enabled. + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] concurrent_functions: "concurrent-functions"; + /// s390x concurrent-functions facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] deflate_conversion: "deflate-conversion"; + /// s390x deflate-conversion facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] enhanced_sort: "enhanced-sort"; + /// s390x enhanced-sort facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] guarded_storage: "guarded-storage"; + /// s390x guarded-storage facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] high_word: "high-word"; + /// s390x high-word facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension3: "message-security-assist-extension3"; + /// s390x message-security-assist-extension3 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension4: "message-security-assist-extension4"; + /// s390x message-security-assist-extension4 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension5: "message-security-assist-extension5"; + /// s390x message-security-assist-extension5 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension8: "message-security-assist-extension8"; + /// s390x message-security-assist-extension8 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension9: "message-security-assist-extension9"; + /// s390x message-security-assist-extension9 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension12: "message-security-assist-extension12"; + /// s390x message-security-assist-extension12 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] miscellaneous_extensions_2: "miscellaneous-extensions-2"; + /// s390x miscellaneous-extensions-2 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] miscellaneous_extensions_3: "miscellaneous-extensions-3"; + /// s390x miscellaneous-extensions-3 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] miscellaneous_extensions_4: "miscellaneous-extensions-4"; + /// s390x miscellaneous-extensions-4 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] nnp_assist: "nnp-assist"; + /// s390x nnp-assist facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] transactional_execution: "transactional-execution"; + /// s390x transactional-execution facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector: "vector"; + /// s390x vector facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_enhancements_1: "vector-enhancements-1"; + /// s390x vector-enhancements-1 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_enhancements_2: "vector-enhancements-2"; + /// s390x vector-enhancements-2 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_enhancements_3: "vector-enhancements-3"; + /// s390x vector-enhancements-3 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal: "vector-packed-decimal"; + /// s390x vector-packed-decimal facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal_enhancement: "vector-packed-decimal-enhancement"; + /// s390x vector-packed-decimal-enhancement facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal_enhancement_2: "vector-packed-decimal-enhancement-2"; + /// s390x vector-packed-decimal-enhancement-2 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal_enhancement_3: "vector-packed-decimal-enhancement-3"; + /// s390x vector-packed-decimal-enhancement-3 facility +} diff --git a/library/std_detect/src/detect/arch/x86.rs b/library/std_detect/src/detect/arch/x86.rs new file mode 100644 index 00000000000..28b3e3cfb35 --- /dev/null +++ b/library/std_detect/src/detect/arch/x86.rs @@ -0,0 +1,278 @@ +//! This module implements minimal run-time feature detection for x86. +//! +//! The features are detected using the `detect_features` function below. +//! This function uses the CPUID instruction to read the feature flags from the +//! CPU and encodes them in a `usize` where each bit position represents +//! whether a feature is available (bit is set) or unavailable (bit is cleared). +//! +//! The enum `Feature` is used to map bit positions to feature names, and the +//! the `__crate::detect::check_for!` macro is used to map string literals (e.g., +//! "avx") to these bit positions (e.g., `Feature::avx`). +//! +//! The run-time feature detection is performed by the +//! `__crate::detect::check_for(Feature) -> bool` function. On its first call, +//! this functions queries the CPU for the available features and stores them +//! in a global `AtomicUsize` variable. The query is performed by just checking +//! whether the feature bit in this global variable is set or cleared. + +features! { + @TARGET: x86; + @CFG: any(target_arch = "x86", target_arch = "x86_64"); + @MACRO_NAME: is_x86_feature_detected; + @MACRO_ATTRS: + /// A macro to test at *runtime* whether a CPU feature is available on + /// x86/x86-64 platforms. + /// + /// This macro is provided in the standard library and will detect at runtime + /// whether the specified CPU feature is detected. This does **not** resolve at + /// compile time unless the specified feature is already enabled for the entire + /// crate. Runtime detection currently relies mostly on the `cpuid` instruction. + /// + /// This macro only takes one argument which is a string literal of the feature + /// being tested for. The feature names supported are the lowercase versions of + /// the ones defined by Intel in [their documentation][docs]. + /// + /// ## Supported arguments + /// + /// This macro supports the same names that `#[target_feature]` supports. Unlike + /// `#[target_feature]`, however, this macro does not support names separated + /// with a comma. Instead testing for multiple features must be done through + /// separate macro invocations for now. + /// + /// Supported arguments are: + /// + /// * `"aes"` + /// * `"pclmulqdq"` + /// * `"rdrand"` + /// * `"rdseed"` + /// * `"tsc"` + /// * `"mmx"` + /// * `"sse"` + /// * `"sse2"` + /// * `"sse3"` + /// * `"ssse3"` + /// * `"sse4.1"` + /// * `"sse4.2"` + /// * `"sse4a"` + /// * `"sha"` + /// * `"avx"` + /// * `"avx2"` + /// * `"sha512"` + /// * `"sm3"` + /// * `"sm4"` + /// * `"avx512f"` + /// * `"avx512cd"` + /// * `"avx512er"` + /// * `"avx512pf"` + /// * `"avx512bw"` + /// * `"avx512dq"` + /// * `"avx512vl"` + /// * `"avx512ifma"` + /// * `"avx512vbmi"` + /// * `"avx512vpopcntdq"` + /// * `"avx512vbmi2"` + /// * `"gfni"` + /// * `"vaes"` + /// * `"vpclmulqdq"` + /// * `"avx512vnni"` + /// * `"avx512bitalg"` + /// * `"avx512bf16"` + /// * `"avx512vp2intersect"` + /// * `"avx512fp16"` + /// * `"avxvnni"` + /// * `"avxifma"` + /// * `"avxneconvert"` + /// * `"avxvnniint8"` + /// * `"avxvnniint16"` + /// * `"amx-tile"` + /// * `"amx-int8"` + /// * `"amx-bf16"` + /// * `"amx-fp16"` + /// * `"amx-complex"` + /// * `"amx-avx512"` + /// * `"amx-fp8"` + /// * `"amx-movrs"` + /// * `"amx-tf32"` + /// * `"amx-transpose"` + /// * `"f16c"` + /// * `"fma"` + /// * `"bmi1"` + /// * `"bmi2"` + /// * `"abm"` + /// * `"lzcnt"` + /// * `"tbm"` + /// * `"popcnt"` + /// * `"fxsr"` + /// * `"xsave"` + /// * `"xsaveopt"` + /// * `"xsaves"` + /// * `"xsavec"` + /// * `"cmpxchg16b"` + /// * `"kl"` + /// * `"widekl"` + /// * `"adx"` + /// * `"rtm"` + /// * `"movbe"` + /// * `"ermsb"` + /// * `"movrs"` + /// * `"xop"` + /// + /// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide + #[stable(feature = "simd_x86", since = "1.27.0")] + @BIND_FEATURE_NAME: "abm"; "lzcnt"; // abm is a synonym for lzcnt + @BIND_FEATURE_NAME: "avx512gfni"; "gfni"; #[deprecated(since = "1.67.0", note = "the `avx512gfni` feature has been renamed to `gfni`")]; + @BIND_FEATURE_NAME: "avx512vaes"; "vaes"; #[deprecated(since = "1.67.0", note = "the `avx512vaes` feature has been renamed to `vaes`")]; + @BIND_FEATURE_NAME: "avx512vpclmulqdq"; "vpclmulqdq"; #[deprecated(since = "1.67.0", note = "the `avx512vpclmulqdq` feature has been renamed to `vpclmulqdq`")]; + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] aes: "aes"; + /// AES (Advanced Encryption Standard New Instructions AES-NI) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] pclmulqdq: "pclmulqdq"; + /// CLMUL (Carry-less Multiplication) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] rdrand: "rdrand"; + /// RDRAND + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] rdseed: "rdseed"; + /// RDSEED + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] tsc: "tsc"; + without cfg check: true; + /// TSC (Time Stamp Counter) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] mmx: "mmx"; + without cfg check: true; + /// MMX (MultiMedia eXtensions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse: "sse"; + /// SSE (Streaming SIMD Extensions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse2: "sse2"; + /// SSE2 (Streaming SIMD Extensions 2) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse3: "sse3"; + /// SSE3 (Streaming SIMD Extensions 3) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] ssse3: "ssse3"; + /// SSSE3 (Supplemental Streaming SIMD Extensions 3) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse4_1: "sse4.1"; + /// SSE4.1 (Streaming SIMD Extensions 4.1) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse4_2: "sse4.2"; + /// SSE4.2 (Streaming SIMD Extensions 4.2) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse4a: "sse4a"; + /// SSE4a (Streaming SIMD Extensions 4a) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sha: "sha"; + /// SHA + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx: "avx"; + /// AVX (Advanced Vector Extensions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx2: "avx2"; + /// AVX2 (Advanced Vector Extensions 2) + @FEATURE: #[stable(feature = "sha512_sm_x86", since = "1.89.0")] sha512: "sha512"; + /// SHA512 + @FEATURE: #[stable(feature = "sha512_sm_x86", since = "1.89.0")] sm3: "sm3"; + /// SM3 + @FEATURE: #[stable(feature = "sha512_sm_x86", since = "1.89.0")] sm4: "sm4"; + /// SM4 + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512f: "avx512f" ; + /// AVX-512 F (Foundation) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512cd: "avx512cd" ; + /// AVX-512 CD (Conflict Detection Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512er: "avx512er"; + without cfg check: true; + /// AVX-512 ER (Expo nential and Reciprocal Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512pf: "avx512pf"; + without cfg check: true; + /// AVX-512 PF (Prefetch Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512bw: "avx512bw"; + /// AVX-512 BW (Byte and Word Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512dq: "avx512dq"; + /// AVX-512 DQ (Doubleword and Quadword) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vl: "avx512vl"; + /// AVX-512 VL (Vector Length Extensions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512ifma: "avx512ifma"; + /// AVX-512 IFMA (Integer Fused Multiply Add) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi: "avx512vbmi"; + /// AVX-512 VBMI (Vector Byte Manipulation Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vpopcntdq: "avx512vpopcntdq"; + /// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and Quadword) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi2: "avx512vbmi2"; + /// AVX-512 VBMI2 (Additional byte, word, dword and qword capabilities) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] gfni: "gfni"; + /// AVX-512 GFNI (Galois Field New Instruction) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] vaes: "vaes"; + /// AVX-512 VAES (Vector AES instruction) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] vpclmulqdq: "vpclmulqdq"; + /// AVX-512 VPCLMULQDQ (Vector PCLMULQDQ instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vnni: "avx512vnni"; + /// AVX-512 VNNI (Vector Neural Network Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512bitalg: "avx512bitalg"; + /// AVX-512 BITALG (Support for VPOPCNT\[B,W\] and VPSHUFBITQMB) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512bf16: "avx512bf16"; + /// AVX-512 BF16 (BFLOAT16 instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vp2intersect: "avx512vp2intersect"; + /// AVX-512 P2INTERSECT + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512fp16: "avx512fp16"; + /// AVX-512 FP16 (FLOAT16 instructions) + @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.89.0")] avxifma: "avxifma"; + /// AVX-IFMA (Integer Fused Multiply Add) + @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.89.0")] avxneconvert: "avxneconvert"; + /// AVX-NE-CONVERT (Exceptionless Convert) + @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.89.0")] avxvnni: "avxvnni"; + /// AVX-VNNI (Vector Neural Network Instructions) + @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.89.0")] avxvnniint16: "avxvnniint16"; + /// AVX-VNNI_INT8 (VNNI with 16-bit Integers) + @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.89.0")] avxvnniint8: "avxvnniint8"; + /// AVX-VNNI_INT16 (VNNI with 8-bit integers) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_tile: "amx-tile"; + /// AMX (Advanced Matrix Extensions) - Tile load/store + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_int8: "amx-int8"; + /// AMX-INT8 (Operations on 8-bit integers) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_bf16: "amx-bf16"; + /// AMX-BF16 (BFloat16 Operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_fp16: "amx-fp16"; + /// AMX-FP16 (Float16 Operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_complex: "amx-complex"; + /// AMX-COMPLEX (Complex number Operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_avx512: "amx-avx512"; + /// AMX-AVX512 (AVX512 operations extended to matrices) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_fp8: "amx-fp8"; + /// AMX-FP8 (Float8 Operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_movrs: "amx-movrs"; + /// AMX-MOVRS (Matrix MOVERS operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_tf32: "amx-tf32"; + /// AMX-TF32 (TensorFloat32 Operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_transpose: "amx-transpose"; + /// AMX-TRANSPOSE (Matrix Transpose Operations) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] f16c: "f16c"; + /// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fma: "fma"; + /// FMA (Fused Multiply Add) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] bmi1: "bmi1" ; + /// BMI1 (Bit Manipulation Instructions 1) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] bmi2: "bmi2" ; + /// BMI2 (Bit Manipulation Instructions 2) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] lzcnt: "lzcnt"; + /// ABM (Advanced Bit Manipulation) / LZCNT (Leading Zero Count) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] tbm: "tbm"; + /// TBM (Trailing Bit Manipulation) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] popcnt: "popcnt"; + /// POPCNT (Population Count) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fxsr: "fxsr"; + /// FXSR (Floating-point context fast save and restore) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsave: "xsave"; + /// XSAVE (Save Processor Extended States) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsaveopt: "xsaveopt"; + /// XSAVEOPT (Save Processor Extended States Optimized) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsaves: "xsaves"; + /// XSAVES (Save Processor Extended States Supervisor) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsavec: "xsavec"; + /// XSAVEC (Save Processor Extended States Compacted) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] cmpxchg16b: "cmpxchg16b"; + /// CMPXCH16B (16-byte compare-and-swap instruction) + @FEATURE: #[stable(feature = "keylocker_x86", since = "1.89.0")] kl: "kl"; + /// Intel Key Locker + @FEATURE: #[stable(feature = "keylocker_x86", since = "1.89.0")] widekl: "widekl"; + /// Intel Key Locker Wide + @FEATURE: #[stable(feature = "simd_x86_adx", since = "1.33.0")] adx: "adx"; + /// ADX, Intel ADX (Multi-Precision Add-Carry Instruction Extensions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] rtm: "rtm"; + /// RTM, Intel (Restricted Transactional Memory) + @FEATURE: #[stable(feature = "movbe_target_feature", since = "1.67.0")] movbe: "movbe"; + /// MOVBE (Move Data After Swapping Bytes) + @FEATURE: #[unstable(feature = "movrs_target_feature", issue = "137976")] movrs: "movrs"; + /// MOVRS (Move data with the read-shared hint) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] ermsb: "ermsb"; + /// ERMSB, Enhanced REP MOVSB and STOSB + @FEATURE: #[unstable(feature = "xop_target_feature", issue = "127208")] xop: "xop"; + /// XOP: eXtended Operations (AMD) +} diff --git a/library/std_detect/src/detect/bit.rs b/library/std_detect/src/detect/bit.rs new file mode 100644 index 00000000000..6f06c5523e4 --- /dev/null +++ b/library/std_detect/src/detect/bit.rs @@ -0,0 +1,9 @@ +//! Bit manipulation utilities. + +/// Tests the `bit` of `x`. +#[allow(dead_code)] +#[inline] +pub(crate) fn test(x: usize, bit: u32) -> bool { + debug_assert!(bit < usize::BITS, "bit index out-of-bounds"); + x & (1 << bit) != 0 +} diff --git a/library/std_detect/src/detect/cache.rs b/library/std_detect/src/detect/cache.rs new file mode 100644 index 00000000000..1a42e091463 --- /dev/null +++ b/library/std_detect/src/detect/cache.rs @@ -0,0 +1,201 @@ +//! Caches run-time feature detection so that it only needs to be computed +//! once. + +#![allow(dead_code)] // not used on all platforms + +use core::sync::atomic::{AtomicUsize, Ordering}; + +/// Sets the `bit` of `x`. +#[inline] +const fn set_bit(x: u128, bit: u32) -> u128 { + x | 1 << bit +} + +/// Tests the `bit` of `x`. +#[inline] +const fn test_bit(x: u128, bit: u32) -> bool { + x & (1 << bit) != 0 +} + +/// Unset the `bit of `x`. +#[inline] +const fn unset_bit(x: u128, bit: u32) -> u128 { + x & !(1 << bit) +} + +/// Maximum number of features that can be cached. +const CACHE_CAPACITY: u32 = 93; + +/// This type is used to initialize the cache +// The derived `Default` implementation will initialize the field to zero, +// which is what we want. +#[derive(Copy, Clone, Default, PartialEq, Eq)] +pub(crate) struct Initializer(u128); + +// NOTE: the `debug_assert!` would catch that we do not add more Features than +// the one fitting our cache. +impl Initializer { + /// Tests the `bit` of the cache. + #[inline] + pub(crate) fn test(self, bit: u32) -> bool { + debug_assert!(bit < CACHE_CAPACITY, "too many features, time to increase the cache size!"); + test_bit(self.0, bit) + } + + /// Sets the `bit` of the cache. + #[inline] + pub(crate) fn set(&mut self, bit: u32) { + debug_assert!(bit < CACHE_CAPACITY, "too many features, time to increase the cache size!"); + let v = self.0; + self.0 = set_bit(v, bit); + } + + /// Unsets the `bit` of the cache. + #[inline] + pub(crate) fn unset(&mut self, bit: u32) { + debug_assert!(bit < CACHE_CAPACITY, "too many features, time to increase the cache size!"); + let v = self.0; + self.0 = unset_bit(v, bit); + } +} + +/// This global variable is a cache of the features supported by the CPU. +// Note: the third slot is only used in x86 +// Another Slot can be added if needed without any change to `Initializer` +static CACHE: [Cache; 3] = [Cache::uninitialized(), Cache::uninitialized(), Cache::uninitialized()]; + +/// Feature cache with capacity for `size_of::<usize>() * 8 - 1` features. +/// +/// Note: 0 is used to represent an uninitialized cache, and (at least) the most +/// significant bit is set on any cache which has been initialized. +/// +/// Note: we use `Relaxed` atomic operations, because we are only interested in +/// the effects of operations on a single memory location. That is, we only need +/// "modification order", and not the full-blown "happens before". +struct Cache(AtomicUsize); + +impl Cache { + const CAPACITY: u32 = (core::mem::size_of::<usize>() * 8 - 1) as u32; + const MASK: usize = (1 << Cache::CAPACITY) - 1; + const INITIALIZED_BIT: usize = 1usize << Cache::CAPACITY; + + /// Creates an uninitialized cache. + #[allow(clippy::declare_interior_mutable_const)] + const fn uninitialized() -> Self { + Cache(AtomicUsize::new(0)) + } + + /// Is the `bit` in the cache set? Returns `None` if the cache has not been initialized. + #[inline] + pub(crate) fn test(&self, bit: u32) -> Option<bool> { + let cached = self.0.load(Ordering::Relaxed); + if cached == 0 { None } else { Some(test_bit(cached as u128, bit)) } + } + + /// Initializes the cache. + #[inline] + fn initialize(&self, value: usize) -> usize { + debug_assert_eq!((value & !Cache::MASK), 0); + self.0.store(value | Cache::INITIALIZED_BIT, Ordering::Relaxed); + value + } +} + +cfg_if::cfg_if! { + if #[cfg(feature = "std_detect_env_override")] { + #[inline] + fn disable_features(disable: &[u8], value: &mut Initializer) { + if let Ok(disable) = core::str::from_utf8(disable) { + for v in disable.split(" ") { + let _ = super::Feature::from_str(v).map(|v| value.unset(v as u32)); + } + } + } + + #[inline] + fn initialize(mut value: Initializer) -> Initializer { + use core::ffi::CStr; + const RUST_STD_DETECT_UNSTABLE: &CStr = c"RUST_STD_DETECT_UNSTABLE"; + cfg_if::cfg_if! { + if #[cfg(windows)] { + use alloc::vec; + #[link(name = "kernel32")] + unsafe extern "system" { + fn GetEnvironmentVariableA(name: *const u8, buffer: *mut u8, size: u32) -> u32; + } + let len = unsafe { GetEnvironmentVariableA(RUST_STD_DETECT_UNSTABLE.as_ptr().cast::<u8>(), core::ptr::null_mut(), 0) }; + if len > 0 { + // +1 to include the null terminator. + let mut env = vec![0; len as usize + 1]; + let len = unsafe { GetEnvironmentVariableA(RUST_STD_DETECT_UNSTABLE.as_ptr().cast::<u8>(), env.as_mut_ptr(), len + 1) }; + if len > 0 { + disable_features(&env[..len as usize], &mut value); + } + } + } else { + let env = unsafe { + libc::getenv(RUST_STD_DETECT_UNSTABLE.as_ptr()) + }; + if !env.is_null() { + let len = unsafe { libc::strlen(env) }; + let env = unsafe { core::slice::from_raw_parts(env as *const u8, len) }; + disable_features(env, &mut value); + } + } + } + do_initialize(value); + value + } + } else { + #[inline] + fn initialize(value: Initializer) -> Initializer { + do_initialize(value); + value + } + } +} + +#[inline] +fn do_initialize(value: Initializer) { + CACHE[0].initialize((value.0) as usize & Cache::MASK); + CACHE[1].initialize((value.0 >> Cache::CAPACITY) as usize & Cache::MASK); + CACHE[2].initialize((value.0 >> (2 * Cache::CAPACITY)) as usize & Cache::MASK); +} + +// We only have to detect features once, and it's fairly costly, so hint to LLVM +// that it should assume that cache hits are more common than misses (which is +// the point of caching). It's possibly unfortunate that this function needs to +// reach across modules like this to call `os::detect_features`, but it produces +// the best code out of several attempted variants. +// +// The `Initializer` that the cache was initialized with is returned, so that +// the caller can call `test()` on it without having to load the value from the +// cache again. +#[cold] +fn detect_and_initialize() -> Initializer { + initialize(super::os::detect_features()) +} + +/// Tests the `bit` of the storage. If the storage has not been initialized, +/// initializes it with the result of `os::detect_features()`. +/// +/// On its first invocation, it detects the CPU features and caches them in the +/// `CACHE` global variable as an `AtomicU64`. +/// +/// It uses the `Feature` variant to index into this variable as a bitset. If +/// the bit is set, the feature is enabled, and otherwise it is disabled. +/// +/// If the feature `std_detect_env_override` is enabled looks for the env +/// variable `RUST_STD_DETECT_UNSTABLE` and uses its content to disable +/// Features that would had been otherwise detected. +#[inline] +pub(crate) fn test(bit: u32) -> bool { + let (relative_bit, idx) = if bit < Cache::CAPACITY { + (bit, 0) + } else if bit < 2 * Cache::CAPACITY { + (bit - Cache::CAPACITY, 1) + } else { + (bit - 2 * Cache::CAPACITY, 2) + }; + CACHE[idx].test(relative_bit).unwrap_or_else(|| detect_and_initialize().test(bit)) +} diff --git a/library/std_detect/src/detect/macros.rs b/library/std_detect/src/detect/macros.rs new file mode 100644 index 00000000000..17140e15653 --- /dev/null +++ b/library/std_detect/src/detect/macros.rs @@ -0,0 +1,203 @@ +#[macro_export] +#[allow_internal_unstable(stdarch_internal)] +#[unstable(feature = "stdarch_internal", issue = "none")] +macro_rules! detect_feature { + ($feature:tt, $feature_lit:tt) => { + $crate::detect_feature!($feature, $feature_lit : $feature_lit) + }; + ($feature:tt, $feature_lit:tt : $($target_feature_lit:tt),*) => { + $(cfg!(target_feature = $target_feature_lit) ||)* + $crate::detect::__is_feature_detected::$feature() + }; + ($feature:tt, $feature_lit:tt, without cfg check: true) => { + $crate::detect::__is_feature_detected::$feature() + }; +} + +#[allow(unused_macros, reason = "it's used in the features! macro below")] +macro_rules! check_cfg_feature { + ($feature:tt, $feature_lit:tt) => { + check_cfg_feature!($feature, $feature_lit : $feature_lit) + }; + ($feature:tt, $feature_lit:tt : $($target_feature_lit:tt),*) => { + $(cfg!(target_feature = $target_feature_lit);)* + }; + ($feature:tt, $feature_lit:tt, without cfg check: $feature_cfg_check:literal) => { + #[allow(unexpected_cfgs, reason = $feature_lit)] + { cfg!(target_feature = $feature_lit) } + }; +} + +#[allow(unused)] +macro_rules! features { + ( + @TARGET: $target:ident; + @CFG: $cfg:meta; + @MACRO_NAME: $macro_name:ident; + @MACRO_ATTRS: $(#[$macro_attrs:meta])* + $(@BIND_FEATURE_NAME: $bind_feature:tt; $feature_impl:tt; $(#[$deprecate_attr:meta];)?)* + $(@NO_RUNTIME_DETECTION: $nort_feature:tt; )* + $(@FEATURE: #[$stability_attr:meta] $feature:ident: $feature_lit:tt; + $(without cfg check: $feature_cfg_check:tt;)? + $(implied by target_features: [$($target_feature_lit:tt),*];)? + $(#[$feature_comment:meta])*)* + ) => { + #[macro_export] + $(#[$macro_attrs])* + #[allow_internal_unstable(stdarch_internal)] + #[cfg($cfg)] + #[doc(cfg($cfg))] + macro_rules! $macro_name { + $( + ($feature_lit) => { + $crate::detect_feature!($feature, $feature_lit $(, without cfg check: $feature_cfg_check)? $(: $($target_feature_lit),*)?) + }; + )* + $( + ($bind_feature) => { + { + $( + #[$deprecate_attr] macro_rules! deprecated_feature { {} => {}; } + deprecated_feature! {}; + )? + $crate::$macro_name!($feature_impl) + } + }; + )* + $( + ($nort_feature) => { + compile_error!( + concat!( + stringify!($nort_feature), + " feature cannot be detected at run-time" + ) + ) + }; + )* + ($t:tt,) => { + $crate::$macro_name!($t); + }; + ($t:tt) => { + compile_error!( + concat!( + concat!("unknown ", stringify!($target)), + concat!(" target feature: ", $t) + ) + ) + }; + } + + $(#[$macro_attrs])* + #[macro_export] + #[cfg(not($cfg))] + #[doc(cfg($cfg))] + macro_rules! $macro_name { + $( + ($feature_lit) => { + compile_error!( + concat!( + r#"This macro cannot be used on the current target. + You can prevent it from being used in other architectures by + guarding it behind a cfg("#, + stringify!($cfg), + ")." + ) + ) + }; + )* + $( + ($bind_feature) => { $crate::$macro_name!($feature_impl) }; + )* + $( + ($nort_feature) => { + compile_error!( + concat!( + stringify!($nort_feature), + " feature cannot be detected at run-time" + ) + ) + }; + )* + ($t:tt,) => { + $crate::$macro_name!($t); + }; + ($t:tt) => { + compile_error!( + concat!( + concat!("unknown ", stringify!($target)), + concat!(" target feature: ", $t) + ) + ) + }; + } + + #[deny(unexpected_cfgs)] + #[deny(unfulfilled_lint_expectations)] + const _: () = { + $( + check_cfg_feature!($feature, $feature_lit $(, without cfg check: $feature_cfg_check)? $(: $($target_feature_lit),*)?); + )* + }; + + /// Each variant denotes a position in a bitset for a particular feature. + /// + /// PLEASE: do not use this, it is an implementation detail subject + /// to change. + #[doc(hidden)] + #[allow(non_camel_case_types)] + #[derive(Copy, Clone)] + #[repr(u8)] + #[unstable(feature = "stdarch_internal", issue = "none")] + #[cfg($cfg)] + pub(crate) enum Feature { + $( + $(#[$feature_comment])* + $feature, + )* + + // Do not add variants after last: + _last + } + + #[cfg($cfg)] + impl Feature { + pub(crate) fn to_str(self) -> &'static str { + match self { + $(Feature::$feature => $feature_lit,)* + Feature::_last => unreachable!(), + } + } + + #[cfg(feature = "std_detect_env_override")] + pub(crate) fn from_str(s: &str) -> Result<Feature, ()> { + match s { + $($feature_lit => Ok(Feature::$feature),)* + _ => Err(()) + } + } + } + + /// Each function performs run-time feature detection for a single + /// feature. This allow us to use stability attributes on a per feature + /// basis. + /// + /// PLEASE: do not use this, it is an implementation detail subject + /// to change. + #[doc(hidden)] + #[cfg($cfg)] + #[unstable(feature = "stdarch_internal", issue = "none")] + pub mod __is_feature_detected { + $( + + /// PLEASE: do not use this, it is an implementation detail + /// subject to change. + #[inline] + #[doc(hidden)] + #[$stability_attr] + pub fn $feature() -> bool { + $crate::detect::check_for($crate::detect::Feature::$feature) + } + )* + } + }; +} diff --git a/library/std_detect/src/detect/mod.rs b/library/std_detect/src/detect/mod.rs new file mode 100644 index 00000000000..f936a5a1345 --- /dev/null +++ b/library/std_detect/src/detect/mod.rs @@ -0,0 +1,120 @@ +//! This module implements run-time feature detection. +//! +//! The `is_{arch}_feature_detected!("feature-name")` macros take the name of a +//! feature as a string-literal, and return a boolean indicating whether the +//! feature is enabled at run-time or not. +//! +//! These macros do two things: +//! * map the string-literal into an integer stored as a `Feature` enum, +//! * call a `os::check_for(x: Feature)` function that returns `true` if the +//! feature is enabled. +//! +//! The `Feature` enums are also implemented in the `arch/{target_arch}.rs` +//! modules. +//! +//! The `check_for` functions are, in general, Operating System dependent. Most +//! architectures do not allow user-space programs to query the feature bits +//! due to security concerns (x86 is the big exception). These functions are +//! implemented in the `os/{target_os}.rs` modules. + +use cfg_if::cfg_if; + +#[macro_use] +mod macros; + +mod arch; + +// This module needs to be public because the `is_{arch}_feature_detected!` +// macros expand calls to items within it in user crates. +#[doc(hidden)] +#[unstable(feature = "stdarch_internal", issue = "none")] +pub use self::arch::__is_feature_detected; +pub(crate) use self::arch::Feature; + +mod bit; +mod cache; + +cfg_if! { + if #[cfg(miri)] { + // When running under miri all target-features that are not enabled at + // compile-time are reported as disabled at run-time. + // + // For features for which `cfg(target_feature)` returns true, + // this run-time detection logic is never called. + #[path = "os/other.rs"] + mod os; + } else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { + // On x86/x86_64 no OS specific functionality is required. + #[path = "os/x86.rs"] + mod os; + } else if #[cfg(all(any(target_os = "linux", target_os = "android"), feature = "libc"))] { + #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] + #[path = "os/riscv.rs"] + mod riscv; + #[path = "os/linux/mod.rs"] + mod os; + } else if #[cfg(all(target_os = "freebsd", feature = "libc"))] { + #[cfg(target_arch = "aarch64")] + #[path = "os/aarch64.rs"] + mod aarch64; + #[path = "os/freebsd/mod.rs"] + mod os; + } else if #[cfg(all(target_os = "openbsd", target_arch = "aarch64", feature = "libc"))] { + #[allow(dead_code)] // we don't use code that calls the mrs instruction. + #[path = "os/aarch64.rs"] + mod aarch64; + #[path = "os/openbsd/aarch64.rs"] + mod os; + } else if #[cfg(all(target_os = "windows", any(target_arch = "aarch64", target_arch = "arm64ec")))] { + #[path = "os/windows/aarch64.rs"] + mod os; + } else if #[cfg(all(target_vendor = "apple", target_arch = "aarch64", feature = "libc"))] { + #[path = "os/darwin/aarch64.rs"] + mod os; + } else { + #[path = "os/other.rs"] + mod os; + } +} + +/// Performs run-time feature detection. +#[inline] +#[allow(dead_code)] +fn check_for(x: Feature) -> bool { + cache::test(x as u32) +} + +/// Returns an `Iterator<Item=(&'static str, bool)>` where +/// `Item.0` is the feature name, and `Item.1` is a `bool` which +/// is `true` if the feature is supported by the host and `false` otherwise. +#[unstable(feature = "stdarch_internal", issue = "none")] +pub fn features() -> impl Iterator<Item = (&'static str, bool)> { + cfg_if! { + if #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64", + target_arch = "arm64ec", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "mips", + target_arch = "mips64", + target_arch = "loongarch32", + target_arch = "loongarch64", + target_arch = "s390x", + ))] { + (0_u8..Feature::_last as u8).map(|discriminant: u8| { + #[allow(bindings_with_variant_name)] // RISC-V has Feature::f + let f: Feature = unsafe { core::mem::transmute(discriminant) }; + let name: &'static str = f.to_str(); + let enabled: bool = check_for(f); + (name, enabled) + }) + } else { + None.into_iter() + } + } +} diff --git a/library/std_detect/src/detect/os/aarch64.rs b/library/std_detect/src/detect/os/aarch64.rs new file mode 100644 index 00000000000..c2c754ccf8d --- /dev/null +++ b/library/std_detect/src/detect/os/aarch64.rs @@ -0,0 +1,128 @@ +//! Run-time feature detection for Aarch64 on any OS that emulates the mrs instruction. +//! +//! On FreeBSD >= 12.0, Linux >= 4.11 and other operating systems, it is possible to use +//! privileged system registers from userspace to check CPU feature support. +//! +//! AArch64 system registers ID_AA64ISAR0_EL1, ID_AA64PFR0_EL1, ID_AA64ISAR1_EL1 +//! have bits dedicated to features like AdvSIMD, CRC32, AES, atomics (LSE), etc. +//! Each part of the register indicates the level of support for a certain feature, e.g. +//! when ID_AA64ISAR0_EL1\[7:4\] is >= 1, AES is supported; when it's >= 2, PMULL is supported. +//! +//! For proper support of [SoCs where different cores have different capabilities](https://medium.com/@jadr2ddude/a-big-little-problem-a-tale-of-big-little-gone-wrong-e7778ce744bb), +//! the OS has to always report only the features supported by all cores, like [FreeBSD does](https://reviews.freebsd.org/D17137#393947). +//! +//! References: +//! +//! - [Zircon implementation](https://fuchsia.googlesource.com/zircon/+/master/kernel/arch/arm64/feature.cpp) +//! - [Linux documentation](https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt) +//! - [ARM documentation](https://developer.arm.com/documentation/ddi0601/2022-12/AArch64-Registers?lang=en) + +use core::arch::asm; + +use crate::detect::{Feature, cache}; + +/// Try to read the features from the system registers. +/// +/// This will cause SIGILL if the current OS is not trapping the mrs instruction. +pub(crate) fn detect_features() -> cache::Initializer { + // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0 + let aa64isar0: u64; + unsafe { + asm!( + "mrs {}, ID_AA64ISAR0_EL1", + out(reg) aa64isar0, + options(pure, nomem, preserves_flags, nostack) + ); + } + + // ID_AA64ISAR1_EL1 - Instruction Set Attribute Register 1 + let aa64isar1: u64; + unsafe { + asm!( + "mrs {}, ID_AA64ISAR1_EL1", + out(reg) aa64isar1, + options(pure, nomem, preserves_flags, nostack) + ); + } + + // ID_AA64MMFR2_EL1 - AArch64 Memory Model Feature Register 2 + let aa64mmfr2: u64; + unsafe { + asm!( + "mrs {}, ID_AA64MMFR2_EL1", + out(reg) aa64mmfr2, + options(pure, nomem, preserves_flags, nostack) + ); + } + + // ID_AA64PFR0_EL1 - Processor Feature Register 0 + let aa64pfr0: u64; + unsafe { + asm!( + "mrs {}, ID_AA64PFR0_EL1", + out(reg) aa64pfr0, + options(pure, nomem, preserves_flags, nostack) + ); + } + + parse_system_registers(aa64isar0, aa64isar1, aa64mmfr2, Some(aa64pfr0)) +} + +pub(crate) fn parse_system_registers( + aa64isar0: u64, + aa64isar1: u64, + aa64mmfr2: u64, + aa64pfr0: Option<u64>, +) -> cache::Initializer { + let mut value = cache::Initializer::default(); + + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0 + enable_feature(Feature::pmull, bits_shift(aa64isar0, 7, 4) >= 2); + enable_feature(Feature::tme, bits_shift(aa64isar0, 27, 24) == 1); + enable_feature(Feature::lse, bits_shift(aa64isar0, 23, 20) >= 2); + enable_feature(Feature::crc, bits_shift(aa64isar0, 19, 16) >= 1); + + // ID_AA64PFR0_EL1 - Processor Feature Register 0 + if let Some(aa64pfr0) = aa64pfr0 { + let fp = bits_shift(aa64pfr0, 19, 16) < 0xF; + let fphp = bits_shift(aa64pfr0, 19, 16) >= 1; + let asimd = bits_shift(aa64pfr0, 23, 20) < 0xF; + let asimdhp = bits_shift(aa64pfr0, 23, 20) >= 1; + enable_feature(Feature::fp, fp); + enable_feature(Feature::fp16, fphp); + // SIMD support requires float support - if half-floats are + // supported, it also requires half-float support: + enable_feature(Feature::asimd, fp && asimd && (!fphp | asimdhp)); + // SIMD extensions require SIMD support: + enable_feature(Feature::aes, asimd && bits_shift(aa64isar0, 7, 4) >= 2); + let sha1 = bits_shift(aa64isar0, 11, 8) >= 1; + let sha2 = bits_shift(aa64isar0, 15, 12) >= 1; + enable_feature(Feature::sha2, asimd && sha1 && sha2); + enable_feature(Feature::rdm, asimd && bits_shift(aa64isar0, 31, 28) >= 1); + enable_feature(Feature::dotprod, asimd && bits_shift(aa64isar0, 47, 44) >= 1); + enable_feature(Feature::sve, asimd && bits_shift(aa64pfr0, 35, 32) >= 1); + } + + // ID_AA64ISAR1_EL1 - Instruction Set Attribute Register 1 + // Check for either APA or API field + enable_feature(Feature::paca, bits_shift(aa64isar1, 11, 4) >= 1); + enable_feature(Feature::rcpc, bits_shift(aa64isar1, 23, 20) >= 1); + // Check for either GPA or GPI field + enable_feature(Feature::pacg, bits_shift(aa64isar1, 31, 24) >= 1); + + // ID_AA64MMFR2_EL1 - AArch64 Memory Model Feature Register 2 + enable_feature(Feature::lse2, bits_shift(aa64mmfr2, 35, 32) >= 1); + + value +} + +#[inline] +fn bits_shift(x: u64, high: usize, low: usize) -> u64 { + (x >> low) & ((1 << (high - low + 1)) - 1) +} diff --git a/library/std_detect/src/detect/os/darwin/aarch64.rs b/library/std_detect/src/detect/os/darwin/aarch64.rs new file mode 100644 index 00000000000..f5409361d93 --- /dev/null +++ b/library/std_detect/src/detect/os/darwin/aarch64.rs @@ -0,0 +1,150 @@ +//! Run-time feature detection for aarch64 on Darwin (macOS/iOS/tvOS/watchOS/visionOS). +//! +//! <https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics> + +use core::ffi::CStr; + +use crate::detect::{Feature, cache}; + +#[inline] +fn _sysctlbyname(name: &CStr) -> bool { + use libc; + + let mut enabled: i32 = 0; + let mut enabled_len: usize = 4; + let enabled_ptr = &mut enabled as *mut i32 as *mut libc::c_void; + + let ret = unsafe { + libc::sysctlbyname(name.as_ptr(), enabled_ptr, &mut enabled_len, core::ptr::null_mut(), 0) + }; + + match ret { + 0 => enabled != 0, + _ => false, + } +} + +/// Try to read the features using sysctlbyname. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + // Armv8.0 features not using the standard identifiers + let fp = _sysctlbyname(c"hw.optional.floatingpoint"); + let asimd = _sysctlbyname(c"hw.optional.AdvSIMD"); + let crc = _sysctlbyname(c"hw.optional.armv8_crc32"); + + // Armv8 and Armv9 features using the standard identifiers + let aes = _sysctlbyname(c"hw.optional.arm.FEAT_AES"); + let bf16 = _sysctlbyname(c"hw.optional.arm.FEAT_BF16"); + let bti = _sysctlbyname(c"hw.optional.arm.FEAT_BTI"); + let cssc = _sysctlbyname(c"hw.optional.arm.FEAT_CSSC"); + let dit = _sysctlbyname(c"hw.optional.arm.FEAT_DIT"); + let dpb = _sysctlbyname(c"hw.optional.arm.FEAT_DPB"); + let dpb2 = _sysctlbyname(c"hw.optional.arm.FEAT_DPB2"); + let dotprod = _sysctlbyname(c"hw.optional.arm.FEAT_DotProd"); + let ecv = _sysctlbyname(c"hw.optional.arm.FEAT_ECV"); + let fcma = _sysctlbyname(c"hw.optional.arm.FEAT_FCMA"); + let fhm = _sysctlbyname(c"hw.optional.arm.FEAT_FHM"); + let fp16 = _sysctlbyname(c"hw.optional.arm.FEAT_FP16"); + let frintts = _sysctlbyname(c"hw.optional.arm.FEAT_FRINTTS"); + let flagm = _sysctlbyname(c"hw.optional.arm.FEAT_FlagM"); + let flagm2 = _sysctlbyname(c"hw.optional.arm.FEAT_FlagM2"); + let hbc = _sysctlbyname(c"hw.optional.arm.FEAT_HBC"); + let i8mm = _sysctlbyname(c"hw.optional.arm.FEAT_I8MM"); + let jsconv = _sysctlbyname(c"hw.optional.arm.FEAT_JSCVT"); + let rcpc = _sysctlbyname(c"hw.optional.arm.FEAT_LRCPC"); + let rcpc2 = _sysctlbyname(c"hw.optional.arm.FEAT_LRCPC2"); + let lse = _sysctlbyname(c"hw.optional.arm.FEAT_LSE"); + let lse2 = _sysctlbyname(c"hw.optional.arm.FEAT_LSE2"); + let pauth = _sysctlbyname(c"hw.optional.arm.FEAT_PAuth"); + let pmull = _sysctlbyname(c"hw.optional.arm.FEAT_PMULL"); + let rdm = _sysctlbyname(c"hw.optional.arm.FEAT_RDM"); + let sb = _sysctlbyname(c"hw.optional.arm.FEAT_SB"); + let sha1 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA1"); + let sha256 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA256"); + let sha3 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA3"); + let sha512 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA512"); + let sme = _sysctlbyname(c"hw.optional.arm.FEAT_SME"); + let sme2 = _sysctlbyname(c"hw.optional.arm.FEAT_SME2"); + let sme_f64f64 = _sysctlbyname(c"hw.optional.arm.FEAT_SME_F64F64"); + let sme_i16i64 = _sysctlbyname(c"hw.optional.arm.FEAT_SME_I16I64"); + let ssbs = _sysctlbyname(c"hw.optional.arm.FEAT_SSBS"); + let wfxt = _sysctlbyname(c"hw.optional.arm.FEAT_WFxT"); + + // The following features are not exposed by `is_aarch64_feature_detected`, + // but *are* reported by `sysctl`. They are here as documentation that they + // exist, and may potentially be exposed later. + /* + let afp = _sysctlbyname(c"hw.optional.arm.FEAT_AFP"); + let csv2 = _sysctlbyname(c"hw.optional.arm.FEAT_CSV2"); + let csv3 = _sysctlbyname(c"hw.optional.arm.FEAT_CSV3"); + let ebf16 = _sysctlbyname(c"hw.optional.arm.FEAT_EBF16"); + let fpac = _sysctlbyname(c"hw.optional.arm.FEAT_FPAC"); + let fpaccombine = _sysctlbyname(c"hw.optional.arm.FEAT_FPACCOMBINE"); + let pacimp = _sysctlbyname(c"hw.optional.arm.FEAT_PACIMP"); + let pauth2 = _sysctlbyname(c"hw.optional.arm.FEAT_PAuth2"); + let rpres = _sysctlbyname(c"hw.optional.arm.FEAT_RPRES"); + let specres = _sysctlbyname(c"hw.optional.arm.FEAT_SPECRES"); + let specres2 = _sysctlbyname(c"hw.optional.arm.FEAT_SPECRES2"); + */ + + // The following "features" are reported by `sysctl` but are mandatory parts + // of SME or SME2, and so are not exposed separately by + // `is_aarch64_feature_detected`. They are here to document their + // existence, in case they're needed in the future. + /* + let sme_b16f32 = _sysctlbyname(c"hw.optional.arm.SME_B16F32"); + let sme_bi32i32 = _sysctlbyname(c"hw.optional.arm.SME_BI32I32"); + let sme_f16f32 = _sysctlbyname(c"hw.optional.arm.SME_F16F32"); + let sme_f32f32 = _sysctlbyname(c"hw.optional.arm.SME_F32F32"); + let sme_i16i32 = _sysctlbyname(c"hw.optional.arm.SME_I16I32"); + let sme_i8i32 = _sysctlbyname(c"hw.optional.arm.SME_I8I32"); + */ + + enable_feature(Feature::aes, aes && pmull); + enable_feature(Feature::asimd, asimd); + enable_feature(Feature::bf16, bf16); + enable_feature(Feature::bti, bti); + enable_feature(Feature::crc, crc); + enable_feature(Feature::cssc, cssc); + enable_feature(Feature::dit, dit); + enable_feature(Feature::dotprod, dotprod); + enable_feature(Feature::dpb, dpb); + enable_feature(Feature::dpb2, dpb2); + enable_feature(Feature::ecv, ecv); + enable_feature(Feature::fcma, fcma); + enable_feature(Feature::fhm, fhm); + enable_feature(Feature::flagm, flagm); + enable_feature(Feature::flagm2, flagm2); + enable_feature(Feature::fp, fp); + enable_feature(Feature::fp16, fp16); + enable_feature(Feature::frintts, frintts); + enable_feature(Feature::hbc, hbc); + enable_feature(Feature::i8mm, i8mm); + enable_feature(Feature::jsconv, jsconv); + enable_feature(Feature::lse, lse); + enable_feature(Feature::lse2, lse2); + enable_feature(Feature::paca, pauth); + enable_feature(Feature::pacg, pauth); + enable_feature(Feature::pmull, aes && pmull); + enable_feature(Feature::rcpc, rcpc); + enable_feature(Feature::rcpc2, rcpc2); + enable_feature(Feature::rdm, rdm); + enable_feature(Feature::sb, sb); + enable_feature(Feature::sha2, sha1 && sha256 && asimd); + enable_feature(Feature::sha3, sha512 && sha3 && asimd); + enable_feature(Feature::sme, sme); + enable_feature(Feature::sme2, sme2); + enable_feature(Feature::sme_f64f64, sme_f64f64); + enable_feature(Feature::sme_i16i64, sme_i16i64); + enable_feature(Feature::ssbs, ssbs); + enable_feature(Feature::wfxt, wfxt); + + value +} diff --git a/library/std_detect/src/detect/os/freebsd/aarch64.rs b/library/std_detect/src/detect/os/freebsd/aarch64.rs new file mode 100644 index 00000000000..ccc48f53605 --- /dev/null +++ b/library/std_detect/src/detect/os/freebsd/aarch64.rs @@ -0,0 +1,3 @@ +//! Run-time feature detection for Aarch64 on FreeBSD. + +pub(crate) use super::super::aarch64::detect_features; diff --git a/library/std_detect/src/detect/os/freebsd/arm.rs b/library/std_detect/src/detect/os/freebsd/arm.rs new file mode 100644 index 00000000000..0a15156e1bd --- /dev/null +++ b/library/std_detect/src/detect/os/freebsd/arm.rs @@ -0,0 +1,36 @@ +//! Run-time feature detection for ARM on FreeBSD + +use super::auxvec; +use crate::detect::{Feature, cache}; + +// Defined in machine/elf.h. +// https://github.com/freebsd/freebsd-src/blob/deb63adf945d446ed91a9d84124c71f15ae571d1/sys/arm/include/elf.h +const HWCAP_NEON: usize = 0x00001000; +const HWCAP2_AES: usize = 0x00000001; +const HWCAP2_PMULL: usize = 0x00000002; +const HWCAP2_SHA1: usize = 0x00000004; +const HWCAP2_SHA2: usize = 0x00000008; +const HWCAP2_CRC32: usize = 0x00000010; + +/// Try to read the features from the auxiliary vector +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::neon, auxv.hwcap & HWCAP_NEON != 0); + enable_feature(&mut value, Feature::pmull, auxv.hwcap2 & HWCAP2_PMULL != 0); + enable_feature(&mut value, Feature::crc, auxv.hwcap2 & HWCAP2_CRC32 != 0); + enable_feature(&mut value, Feature::aes, auxv.hwcap2 & HWCAP2_AES != 0); + // SHA2 requires SHA1 & SHA2 features + let sha1 = auxv.hwcap2 & HWCAP2_SHA1 != 0; + let sha2 = auxv.hwcap2 & HWCAP2_SHA2 != 0; + enable_feature(&mut value, Feature::sha2, sha1 && sha2); + return value; + } + value +} diff --git a/library/std_detect/src/detect/os/freebsd/auxvec.rs b/library/std_detect/src/detect/os/freebsd/auxvec.rs new file mode 100644 index 00000000000..2a7b87c05d1 --- /dev/null +++ b/library/std_detect/src/detect/os/freebsd/auxvec.rs @@ -0,0 +1,63 @@ +//! Parses ELF auxiliary vectors. +#![cfg_attr( + any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc64", + target_arch = "riscv64" + ), + allow(dead_code) +)] + +/// Cache HWCAP bitfields of the ELF Auxiliary Vector. +/// +/// If an entry cannot be read all the bits in the bitfield are set to zero. +/// This should be interpreted as all the features being disabled. +#[derive(Debug, Copy, Clone)] +pub(crate) struct AuxVec { + pub hwcap: usize, + pub hwcap2: usize, +} + +/// ELF Auxiliary Vector +/// +/// The auxiliary vector is a memory region in a running ELF program's stack +/// composed of (key: usize, value: usize) pairs. +/// +/// The keys used in the aux vector are platform dependent. For FreeBSD, they are +/// defined in [sys/elf_common.h][elf_common_h]. The hardware capabilities of a given +/// CPU can be queried with the `AT_HWCAP` and `AT_HWCAP2` keys. +/// +/// Note that run-time feature detection is not invoked for features that can +/// be detected at compile-time. +/// +/// [elf_common.h]: https://svnweb.freebsd.org/base/release/12.0.0/sys/sys/elf_common.h?revision=341707 +pub(crate) fn auxv() -> Result<AuxVec, ()> { + let hwcap = archauxv(libc::AT_HWCAP); + let hwcap2 = archauxv(libc::AT_HWCAP2); + // Zero could indicate that no features were detected, but it's also used to + // indicate an error. In particular, on many platforms AT_HWCAP2 will be + // legitimately zero, since it contains the most recent feature flags. + if hwcap != 0 || hwcap2 != 0 { + return Ok(AuxVec { hwcap, hwcap2 }); + } + Err(()) +} + +/// Tries to read the `key` from the auxiliary vector. +fn archauxv(key: libc::c_int) -> usize { + const OUT_LEN: libc::c_int = core::mem::size_of::<libc::c_ulong>() as libc::c_int; + let mut out: libc::c_ulong = 0; + unsafe { + // elf_aux_info is available on FreeBSD 12.0+ and 11.4+: + // https://github.com/freebsd/freebsd-src/commit/0b08ae2120cdd08c20a2b806e2fcef4d0a36c470 + // https://github.com/freebsd/freebsd-src/blob/release/11.4.0/sys/sys/auxv.h + // FreeBSD 11 support in std has been removed in Rust 1.75 (https://github.com/rust-lang/rust/pull/114521), + // so we can safely use this function. + let res = + libc::elf_aux_info(key, &mut out as *mut libc::c_ulong as *mut libc::c_void, OUT_LEN); + // If elf_aux_info fails, `out` will be left at zero (which is the proper default value). + debug_assert!(res == 0 || out == 0); + } + out as usize +} diff --git a/library/std_detect/src/detect/os/freebsd/mod.rs b/library/std_detect/src/detect/os/freebsd/mod.rs new file mode 100644 index 00000000000..ade7fb6269d --- /dev/null +++ b/library/std_detect/src/detect/os/freebsd/mod.rs @@ -0,0 +1,22 @@ +//! Run-time feature detection on FreeBSD + +mod auxvec; + +cfg_if::cfg_if! { + if #[cfg(target_arch = "aarch64")] { + mod aarch64; + pub(crate) use self::aarch64::detect_features; + } else if #[cfg(target_arch = "arm")] { + mod arm; + pub(crate) use self::arm::detect_features; + } else if #[cfg(target_arch = "powerpc64")] { + mod powerpc; + pub(crate) use self::powerpc::detect_features; + } else { + use crate::detect::cache; + /// Performs run-time feature detection. + pub(crate) fn detect_features() -> cache::Initializer { + cache::Initializer::default() + } + } +} diff --git a/library/std_detect/src/detect/os/freebsd/powerpc.rs b/library/std_detect/src/detect/os/freebsd/powerpc.rs new file mode 100644 index 00000000000..d03af68cd08 --- /dev/null +++ b/library/std_detect/src/detect/os/freebsd/powerpc.rs @@ -0,0 +1,21 @@ +//! Run-time feature detection for PowerPC on FreeBSD. + +use super::auxvec; +use crate::detect::{Feature, cache}; + +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::altivec, auxv.hwcap & 0x10000000 != 0); + enable_feature(&mut value, Feature::vsx, auxv.hwcap & 0x00000080 != 0); + enable_feature(&mut value, Feature::power8, auxv.hwcap2 & 0x80000000 != 0); + return value; + } + value +} diff --git a/library/std_detect/src/detect/os/linux/aarch64.rs b/library/std_detect/src/detect/os/linux/aarch64.rs new file mode 100644 index 00000000000..87a9d6ebb88 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/aarch64.rs @@ -0,0 +1,398 @@ +//! Run-time feature detection for Aarch64 on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + #[cfg(target_os = "android")] + let is_exynos9810 = { + // Samsung Exynos 9810 has a bug that big and little cores have different + // ISAs. And on older Android (pre-9), the kernel incorrectly reports + // that features available only on some cores are available on all cores. + // https://reviews.llvm.org/D114523 + let mut arch = [0_u8; libc::PROP_VALUE_MAX as usize]; + let len = unsafe { + libc::__system_property_get(c"ro.arch".as_ptr(), arch.as_mut_ptr() as *mut libc::c_char) + }; + // On Exynos, ro.arch is not available on Android 12+, but it is fine + // because Android 9+ includes the fix. + len > 0 && arch.starts_with(b"exynos9810") + }; + #[cfg(not(target_os = "android"))] + let is_exynos9810 = false; + + if let Ok(auxv) = auxvec::auxv() { + let hwcap: AtHwcap = auxv.into(); + return hwcap.cache(is_exynos9810); + } + cache::Initializer::default() +} + +/// These values are part of the platform-specific [asm/hwcap.h][hwcap] . +/// +/// The names match those used for cpuinfo. +/// +/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h +#[derive(Debug, Default, PartialEq)] +struct AtHwcap { + // AT_HWCAP + fp: bool, + asimd: bool, + // evtstrm: No LLVM support. + aes: bool, + pmull: bool, + sha1: bool, + sha2: bool, + crc32: bool, + atomics: bool, + fphp: bool, + asimdhp: bool, + // cpuid: No LLVM support. + asimdrdm: bool, + jscvt: bool, + fcma: bool, + lrcpc: bool, + dcpop: bool, + sha3: bool, + sm3: bool, + sm4: bool, + asimddp: bool, + sha512: bool, + sve: bool, + fhm: bool, + dit: bool, + uscat: bool, + ilrcpc: bool, + flagm: bool, + ssbs: bool, + sb: bool, + paca: bool, + pacg: bool, + + // AT_HWCAP2 + dcpodp: bool, + sve2: bool, + sveaes: bool, + svepmull: bool, + svebitperm: bool, + svesha3: bool, + svesm4: bool, + flagm2: bool, + frint: bool, + // svei8mm: See i8mm feature. + svef32mm: bool, + svef64mm: bool, + // svebf16: See bf16 feature. + i8mm: bool, + bf16: bool, + // dgh: No LLVM support. + rng: bool, + bti: bool, + mte: bool, + ecv: bool, + // afp: bool, + // rpres: bool, + // mte3: bool, + sme: bool, + smei16i64: bool, + smef64f64: bool, + // smei8i32: bool, + // smef16f32: bool, + // smeb16f32: bool, + // smef32f32: bool, + smefa64: bool, + wfxt: bool, + // ebf16: bool, + // sveebf16: bool, + cssc: bool, + // rprfm: bool, + sve2p1: bool, + sme2: bool, + sme2p1: bool, + // smei16i32: bool, + // smebi32i32: bool, + smeb16b16: bool, + smef16f16: bool, + mops: bool, + hbc: bool, + sveb16b16: bool, + lrcpc3: bool, + lse128: bool, + fpmr: bool, + lut: bool, + faminmax: bool, + f8cvt: bool, + f8fma: bool, + f8dp4: bool, + f8dp2: bool, + f8e4m3: bool, + f8e5m2: bool, + smelutv2: bool, + smef8f16: bool, + smef8f32: bool, + smesf8fma: bool, + smesf8dp4: bool, + smesf8dp2: bool, + // pauthlr: bool, +} + +impl From<auxvec::AuxVec> for AtHwcap { + /// Reads AtHwcap from the auxiliary vector. + fn from(auxv: auxvec::AuxVec) -> Self { + AtHwcap { + fp: bit::test(auxv.hwcap, 0), + asimd: bit::test(auxv.hwcap, 1), + // evtstrm: bit::test(auxv.hwcap, 2), + aes: bit::test(auxv.hwcap, 3), + pmull: bit::test(auxv.hwcap, 4), + sha1: bit::test(auxv.hwcap, 5), + sha2: bit::test(auxv.hwcap, 6), + crc32: bit::test(auxv.hwcap, 7), + atomics: bit::test(auxv.hwcap, 8), + fphp: bit::test(auxv.hwcap, 9), + asimdhp: bit::test(auxv.hwcap, 10), + // cpuid: bit::test(auxv.hwcap, 11), + asimdrdm: bit::test(auxv.hwcap, 12), + jscvt: bit::test(auxv.hwcap, 13), + fcma: bit::test(auxv.hwcap, 14), + lrcpc: bit::test(auxv.hwcap, 15), + dcpop: bit::test(auxv.hwcap, 16), + sha3: bit::test(auxv.hwcap, 17), + sm3: bit::test(auxv.hwcap, 18), + sm4: bit::test(auxv.hwcap, 19), + asimddp: bit::test(auxv.hwcap, 20), + sha512: bit::test(auxv.hwcap, 21), + sve: bit::test(auxv.hwcap, 22), + fhm: bit::test(auxv.hwcap, 23), + dit: bit::test(auxv.hwcap, 24), + uscat: bit::test(auxv.hwcap, 25), + ilrcpc: bit::test(auxv.hwcap, 26), + flagm: bit::test(auxv.hwcap, 27), + ssbs: bit::test(auxv.hwcap, 28), + sb: bit::test(auxv.hwcap, 29), + paca: bit::test(auxv.hwcap, 30), + pacg: bit::test(auxv.hwcap, 31), + + // AT_HWCAP2 + dcpodp: bit::test(auxv.hwcap2, 0), + sve2: bit::test(auxv.hwcap2, 1), + sveaes: bit::test(auxv.hwcap2, 2), + svepmull: bit::test(auxv.hwcap2, 3), + svebitperm: bit::test(auxv.hwcap2, 4), + svesha3: bit::test(auxv.hwcap2, 5), + svesm4: bit::test(auxv.hwcap2, 6), + flagm2: bit::test(auxv.hwcap2, 7), + frint: bit::test(auxv.hwcap2, 8), + // svei8mm: bit::test(auxv.hwcap2, 9), + svef32mm: bit::test(auxv.hwcap2, 10), + svef64mm: bit::test(auxv.hwcap2, 11), + // svebf16: bit::test(auxv.hwcap2, 12), + i8mm: bit::test(auxv.hwcap2, 13), + bf16: bit::test(auxv.hwcap2, 14), + // dgh: bit::test(auxv.hwcap2, 15), + rng: bit::test(auxv.hwcap2, 16), + bti: bit::test(auxv.hwcap2, 17), + mte: bit::test(auxv.hwcap2, 18), + ecv: bit::test(auxv.hwcap2, 19), + // afp: bit::test(auxv.hwcap2, 20), + // rpres: bit::test(auxv.hwcap2, 21), + // mte3: bit::test(auxv.hwcap2, 22), + sme: bit::test(auxv.hwcap2, 23), + smei16i64: bit::test(auxv.hwcap2, 24), + smef64f64: bit::test(auxv.hwcap2, 25), + // smei8i32: bit::test(auxv.hwcap2, 26), + // smef16f32: bit::test(auxv.hwcap2, 27), + // smeb16f32: bit::test(auxv.hwcap2, 28), + // smef32f32: bit::test(auxv.hwcap2, 29), + smefa64: bit::test(auxv.hwcap2, 30), + wfxt: bit::test(auxv.hwcap2, 31), + // ebf16: bit::test(auxv.hwcap2, 32), + // sveebf16: bit::test(auxv.hwcap2, 33), + cssc: bit::test(auxv.hwcap2, 34), + // rprfm: bit::test(auxv.hwcap2, 35), + sve2p1: bit::test(auxv.hwcap2, 36), + sme2: bit::test(auxv.hwcap2, 37), + sme2p1: bit::test(auxv.hwcap2, 38), + // smei16i32: bit::test(auxv.hwcap2, 39), + // smebi32i32: bit::test(auxv.hwcap2, 40), + smeb16b16: bit::test(auxv.hwcap2, 41), + smef16f16: bit::test(auxv.hwcap2, 42), + mops: bit::test(auxv.hwcap2, 43), + hbc: bit::test(auxv.hwcap2, 44), + sveb16b16: bit::test(auxv.hwcap2, 45), + lrcpc3: bit::test(auxv.hwcap2, 46), + lse128: bit::test(auxv.hwcap2, 47), + fpmr: bit::test(auxv.hwcap2, 48), + lut: bit::test(auxv.hwcap2, 49), + faminmax: bit::test(auxv.hwcap2, 50), + f8cvt: bit::test(auxv.hwcap2, 51), + f8fma: bit::test(auxv.hwcap2, 52), + f8dp4: bit::test(auxv.hwcap2, 53), + f8dp2: bit::test(auxv.hwcap2, 54), + f8e4m3: bit::test(auxv.hwcap2, 55), + f8e5m2: bit::test(auxv.hwcap2, 56), + smelutv2: bit::test(auxv.hwcap2, 57), + smef8f16: bit::test(auxv.hwcap2, 58), + smef8f32: bit::test(auxv.hwcap2, 59), + smesf8fma: bit::test(auxv.hwcap2, 60), + smesf8dp4: bit::test(auxv.hwcap2, 61), + smesf8dp2: bit::test(auxv.hwcap2, 62), + // pauthlr: bit::test(auxv.hwcap2, ??), + } + } +} + +impl AtHwcap { + /// Initializes the cache from the feature -bits. + /// + /// The feature dependencies here come directly from LLVM's feature definitions: + /// https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AArch64/AArch64.td + fn cache(self, is_exynos9810: bool) -> cache::Initializer { + let mut value = cache::Initializer::default(); + { + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + // Samsung Exynos 9810 has a bug that big and little cores have different + // ISAs. And on older Android (pre-9), the kernel incorrectly reports + // that features available only on some cores are available on all cores. + // So, only check features that are known to be available on exynos-m3: + // $ rustc --print cfg --target aarch64-linux-android -C target-cpu=exynos-m3 | grep target_feature + // See also https://github.com/rust-lang/stdarch/pull/1378#discussion_r1103748342. + if is_exynos9810 { + enable_feature(Feature::fp, self.fp); + enable_feature(Feature::crc, self.crc32); + // ASIMD support requires float support - if half-floats are + // supported, it also requires half-float support: + let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp); + enable_feature(Feature::asimd, asimd); + // Cryptographic extensions require ASIMD + // AES also covers FEAT_PMULL + enable_feature(Feature::aes, self.aes && self.pmull && asimd); + enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd); + return value; + } + + enable_feature(Feature::fp, self.fp); + // Half-float support requires float support + enable_feature(Feature::fp16, self.fp && self.fphp); + // FHM (fp16fml in LLVM) requires half float support + enable_feature(Feature::fhm, self.fphp && self.fhm); + enable_feature(Feature::pmull, self.pmull); + enable_feature(Feature::crc, self.crc32); + enable_feature(Feature::lse, self.atomics); + enable_feature(Feature::lse2, self.uscat); + enable_feature(Feature::lse128, self.lse128 && self.atomics); + enable_feature(Feature::rcpc, self.lrcpc); + // RCPC2 (rcpc-immo in LLVM) requires RCPC support + let rcpc2 = self.ilrcpc && self.lrcpc; + enable_feature(Feature::rcpc2, rcpc2); + enable_feature(Feature::rcpc3, self.lrcpc3 && rcpc2); + enable_feature(Feature::dit, self.dit); + enable_feature(Feature::flagm, self.flagm); + enable_feature(Feature::flagm2, self.flagm2); + enable_feature(Feature::ssbs, self.ssbs); + enable_feature(Feature::sb, self.sb); + enable_feature(Feature::paca, self.paca); + enable_feature(Feature::pacg, self.pacg); + // enable_feature(Feature::pauth_lr, self.pauthlr); + enable_feature(Feature::dpb, self.dcpop); + enable_feature(Feature::dpb2, self.dcpodp); + enable_feature(Feature::rand, self.rng); + enable_feature(Feature::bti, self.bti); + enable_feature(Feature::mte, self.mte); + // jsconv requires float support + enable_feature(Feature::jsconv, self.jscvt && self.fp); + enable_feature(Feature::rdm, self.asimdrdm); + enable_feature(Feature::dotprod, self.asimddp); + enable_feature(Feature::frintts, self.frint); + + // FEAT_I8MM & FEAT_BF16 also include optional SVE components which linux exposes + // separately. We ignore that distinction here. + enable_feature(Feature::i8mm, self.i8mm); + enable_feature(Feature::bf16, self.bf16); + + // ASIMD support requires float support - if half-floats are + // supported, it also requires half-float support: + let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp); + enable_feature(Feature::asimd, asimd); + // ASIMD extensions require ASIMD support: + enable_feature(Feature::fcma, self.fcma && asimd); + enable_feature(Feature::sve, self.sve && asimd); + + // SVE extensions require SVE & ASIMD + enable_feature(Feature::f32mm, self.svef32mm && self.sve && asimd); + enable_feature(Feature::f64mm, self.svef64mm && self.sve && asimd); + + // Cryptographic extensions require ASIMD + enable_feature(Feature::aes, self.aes && asimd); + enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd); + // SHA512/SHA3 require SHA1 & SHA256 + enable_feature( + Feature::sha3, + self.sha512 && self.sha3 && self.sha1 && self.sha2 && asimd, + ); + enable_feature(Feature::sm4, self.sm3 && self.sm4 && asimd); + + // SVE2 requires SVE + let sve2 = self.sve2 && self.sve && asimd; + enable_feature(Feature::sve2, sve2); + enable_feature(Feature::sve2p1, self.sve2p1 && sve2); + // SVE2 extensions require SVE2 and crypto features + enable_feature(Feature::sve2_aes, self.sveaes && self.svepmull && sve2 && self.aes); + enable_feature(Feature::sve2_sm4, self.svesm4 && sve2 && self.sm3 && self.sm4); + enable_feature( + Feature::sve2_sha3, + self.svesha3 && sve2 && self.sha512 && self.sha3 && self.sha1 && self.sha2, + ); + enable_feature(Feature::sve2_bitperm, self.svebitperm && self.sve2); + enable_feature(Feature::sve_b16b16, self.bf16 && self.sveb16b16); + enable_feature(Feature::hbc, self.hbc); + enable_feature(Feature::mops, self.mops); + enable_feature(Feature::ecv, self.ecv); + enable_feature(Feature::lut, self.lut); + enable_feature(Feature::cssc, self.cssc); + enable_feature(Feature::fpmr, self.fpmr); + enable_feature(Feature::faminmax, self.faminmax); + let fp8 = self.f8cvt && self.faminmax && self.lut && self.bf16; + enable_feature(Feature::fp8, fp8); + let fp8fma = self.f8fma && fp8; + enable_feature(Feature::fp8fma, fp8fma); + let fp8dot4 = self.f8dp4 && fp8fma; + enable_feature(Feature::fp8dot4, fp8dot4); + enable_feature(Feature::fp8dot2, self.f8dp2 && fp8dot4); + enable_feature(Feature::wfxt, self.wfxt); + let sme = self.sme && self.bf16; + enable_feature(Feature::sme, sme); + enable_feature(Feature::sme_i16i64, self.smei16i64 && sme); + enable_feature(Feature::sme_f64f64, self.smef64f64 && sme); + enable_feature(Feature::sme_fa64, self.smefa64 && sme && sve2); + let sme2 = self.sme2 && sme; + enable_feature(Feature::sme2, sme2); + enable_feature(Feature::sme2p1, self.sme2p1 && sme2); + enable_feature( + Feature::sme_b16b16, + sme2 && self.bf16 && self.sveb16b16 && self.smeb16b16, + ); + enable_feature(Feature::sme_f16f16, self.smef16f16 && sme2); + enable_feature(Feature::sme_lutv2, self.smelutv2); + let sme_f8f32 = self.smef8f32 && sme2 && fp8; + enable_feature(Feature::sme_f8f32, sme_f8f32); + enable_feature(Feature::sme_f8f16, self.smef8f16 && sme_f8f32); + let ssve_fp8fma = self.smesf8fma && sme2 && fp8; + enable_feature(Feature::ssve_fp8fma, ssve_fp8fma); + let ssve_fp8dot4 = self.smesf8dp4 && ssve_fp8fma; + enable_feature(Feature::ssve_fp8dot4, ssve_fp8dot4); + enable_feature(Feature::ssve_fp8dot2, self.smesf8dp2 && ssve_fp8dot4); + } + value + } +} + +#[cfg(target_endian = "little")] +#[cfg(test)] +mod tests; diff --git a/library/std_detect/src/detect/os/linux/aarch64/tests.rs b/library/std_detect/src/detect/os/linux/aarch64/tests.rs new file mode 100644 index 00000000000..a3562f2fd93 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/aarch64/tests.rs @@ -0,0 +1,77 @@ +use super::*; + +#[cfg(feature = "std_detect_file_io")] +mod auxv_from_file { + use super::auxvec::auxv_from_file; + use super::*; + // The baseline hwcaps used in the (artificial) auxv test files. + fn baseline_hwcaps() -> AtHwcap { + AtHwcap { + fp: true, + asimd: true, + aes: true, + pmull: true, + sha1: true, + sha2: true, + crc32: true, + atomics: true, + fphp: true, + asimdhp: true, + asimdrdm: true, + lrcpc: true, + dcpop: true, + asimddp: true, + ssbs: true, + ..AtHwcap::default() + } + } + + #[test] + fn linux_empty_hwcap2_aarch64() { + let file = concat!( + env!("CARGO_MANIFEST_DIR"), + "/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv" + ); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + println!("HWCAP : 0x{:0x}", v.hwcap); + println!("HWCAP2: 0x{:0x}", v.hwcap2); + assert_eq!(AtHwcap::from(v), baseline_hwcaps()); + } + #[test] + fn linux_no_hwcap2_aarch64() { + let file = concat!( + env!("CARGO_MANIFEST_DIR"), + "/src/detect/test_data/linux-no-hwcap2-aarch64.auxv" + ); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + println!("HWCAP : 0x{:0x}", v.hwcap); + println!("HWCAP2: 0x{:0x}", v.hwcap2); + assert_eq!(AtHwcap::from(v), baseline_hwcaps()); + } + #[test] + fn linux_hwcap2_aarch64() { + let file = + concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-hwcap2-aarch64.auxv"); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + println!("HWCAP : 0x{:0x}", v.hwcap); + println!("HWCAP2: 0x{:0x}", v.hwcap2); + assert_eq!( + AtHwcap::from(v), + AtHwcap { + // Some other HWCAP bits. + paca: true, + pacg: true, + // HWCAP2-only bits. + dcpodp: true, + frint: true, + rng: true, + bti: true, + mte: true, + ..baseline_hwcaps() + } + ); + } +} diff --git a/library/std_detect/src/detect/os/linux/arm.rs b/library/std_detect/src/detect/os/linux/arm.rs new file mode 100644 index 00000000000..bbb173227d0 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/arm.rs @@ -0,0 +1,34 @@ +//! Run-time feature detection for ARM on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm/include/uapi/asm/hwcap.h + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::i8mm, bit::test(auxv.hwcap, 27)); + enable_feature(&mut value, Feature::dotprod, bit::test(auxv.hwcap, 24)); + enable_feature(&mut value, Feature::neon, bit::test(auxv.hwcap, 12)); + enable_feature(&mut value, Feature::pmull, bit::test(auxv.hwcap2, 1)); + enable_feature(&mut value, Feature::crc, bit::test(auxv.hwcap2, 4)); + enable_feature(&mut value, Feature::aes, bit::test(auxv.hwcap2, 0)); + // SHA2 requires SHA1 & SHA2 features + enable_feature( + &mut value, + Feature::sha2, + bit::test(auxv.hwcap2, 2) && bit::test(auxv.hwcap2, 3), + ); + return value; + } + value +} diff --git a/library/std_detect/src/detect/os/linux/auxvec.rs b/library/std_detect/src/detect/os/linux/auxvec.rs new file mode 100644 index 00000000000..443caaaa186 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/auxvec.rs @@ -0,0 +1,234 @@ +//! Parses ELF auxiliary vectors. +#![allow(dead_code)] + +pub(crate) const AT_NULL: usize = 0; + +/// Key to access the CPU Hardware capabilities bitfield. +pub(crate) const AT_HWCAP: usize = 16; +/// Key to access the CPU Hardware capabilities 2 bitfield. +#[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", +))] +pub(crate) const AT_HWCAP2: usize = 26; + +/// Cache HWCAP bitfields of the ELF Auxiliary Vector. +/// +/// If an entry cannot be read all the bits in the bitfield are set to zero. +/// This should be interpreted as all the features being disabled. +#[derive(Debug, Copy, Clone)] +#[cfg_attr(test, derive(PartialEq))] +pub(crate) struct AuxVec { + pub hwcap: usize, + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + pub hwcap2: usize, +} + +/// ELF Auxiliary Vector +/// +/// The auxiliary vector is a memory region in a running ELF program's stack +/// composed of (key: usize, value: usize) pairs. +/// +/// The keys used in the aux vector are platform dependent. For Linux, they are +/// defined in [linux/auxvec.h][auxvec_h]. The hardware capabilities of a given +/// CPU can be queried with the `AT_HWCAP` and `AT_HWCAP2` keys. +/// +/// There is no perfect way of reading the auxiliary vector. +/// +/// - If the `std_detect_dlsym_getauxval` cargo feature is enabled, this will use +/// `getauxval` if its linked to the binary, and otherwise proceed to a fallback implementation. +/// When `std_detect_dlsym_getauxval` is disabled, this will assume that `getauxval` is +/// linked to the binary - if that is not the case the behavior is undefined. +/// - Otherwise, if the `std_detect_file_io` cargo feature is enabled, it will +/// try to read `/proc/self/auxv`. +/// - If that fails, this function returns an error. +/// +/// Note that run-time feature detection is not invoked for features that can +/// be detected at compile-time. +/// +/// Note: The `std_detect_dlsym_getauxval` cargo feature is ignored on +/// `*-linux-{gnu,musl,ohos}*` and `*-android*` targets because we can safely assume `getauxval` +/// is linked to the binary. +/// - `*-linux-gnu*` targets ([since Rust 1.64](https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html)) +/// have glibc requirements higher than [glibc 2.16 that added `getauxval`](https://sourceware.org/legacy-ml/libc-announce/2012/msg00000.html). +/// - `*-linux-musl*` targets ([at least since Rust 1.15](https://github.com/rust-lang/rust/blob/1.15.0/src/ci/docker/x86_64-musl/build-musl.sh#L15)) +/// use musl newer than [musl 1.1.0 that added `getauxval`](https://git.musl-libc.org/cgit/musl/tree/WHATSNEW?h=v1.1.0#n1197) +/// - `*-linux-ohos*` targets use a [fork of musl 1.2](https://gitee.com/openharmony/docs/blob/master/en/application-dev/reference/native-lib/musl.md) +/// - `*-android*` targets ([since Rust 1.68](https://blog.rust-lang.org/2023/01/09/android-ndk-update-r25.html)) +/// have the minimum supported API level higher than [Android 4.3 (API level 18) that added `getauxval`](https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/auxv.h#L49). +/// +/// For more information about when `getauxval` is available check the great +/// [`auxv` crate documentation][auxv_docs]. +/// +/// [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h +/// [auxv_docs]: https://docs.rs/auxv/0.3.3/auxv/ +pub(crate) fn auxv() -> Result<AuxVec, ()> { + // Try to call a getauxval function. + if let Ok(hwcap) = getauxval(AT_HWCAP) { + // Targets with only AT_HWCAP: + #[cfg(any( + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "mips", + target_arch = "mips64", + target_arch = "loongarch32", + target_arch = "loongarch64", + ))] + { + // Zero could indicate that no features were detected, but it's also used to indicate + // an error. In either case, try the fallback. + if hwcap != 0 { + return Ok(AuxVec { hwcap }); + } + } + + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + { + if let Ok(hwcap2) = getauxval(AT_HWCAP2) { + // Zero could indicate that no features were detected, but it's also used to indicate + // an error. In particular, on many platforms AT_HWCAP2 will be legitimately zero, + // since it contains the most recent feature flags. Use the fallback only if no + // features were detected at all. + if hwcap != 0 || hwcap2 != 0 { + return Ok(AuxVec { hwcap, hwcap2 }); + } + } + } + + // Intentionnaly not used + let _ = hwcap; + } + + #[cfg(feature = "std_detect_file_io")] + { + // If calling getauxval fails, try to read the auxiliary vector from + // its file: + auxv_from_file("/proc/self/auxv").map_err(|_| ()) + } + #[cfg(not(feature = "std_detect_file_io"))] + { + Err(()) + } +} + +/// Tries to read the `key` from the auxiliary vector by calling the +/// `getauxval` function. If the function is not linked, this function return `Err`. +fn getauxval(key: usize) -> Result<usize, ()> { + type F = unsafe extern "C" fn(libc::c_ulong) -> libc::c_ulong; + cfg_if::cfg_if! { + if #[cfg(all( + feature = "std_detect_dlsym_getauxval", + not(all( + target_os = "linux", + any(target_env = "gnu", target_env = "musl", target_env = "ohos"), + )), + not(target_os = "android"), + ))] { + let ffi_getauxval: F = unsafe { + let ptr = libc::dlsym(libc::RTLD_DEFAULT, c"getauxval".as_ptr()); + if ptr.is_null() { + return Err(()); + } + core::mem::transmute(ptr) + }; + } else { + let ffi_getauxval: F = libc::getauxval; + } + } + Ok(unsafe { ffi_getauxval(key as libc::c_ulong) as usize }) +} + +/// Tries to read the auxiliary vector from the `file`. If this fails, this +/// function returns `Err`. +#[cfg(feature = "std_detect_file_io")] +pub(super) fn auxv_from_file(file: &str) -> Result<AuxVec, alloc::string::String> { + let file = super::read_file(file)?; + auxv_from_file_bytes(&file) +} + +/// Read auxiliary vector from a slice of bytes. +#[cfg(feature = "std_detect_file_io")] +pub(super) fn auxv_from_file_bytes(bytes: &[u8]) -> Result<AuxVec, alloc::string::String> { + // See <https://github.com/torvalds/linux/blob/v5.15/include/uapi/linux/auxvec.h>. + // + // The auxiliary vector contains at most 34 (key,value) fields: from + // `AT_MINSIGSTKSZ` to `AT_NULL`, but its number may increase. + let len = bytes.len(); + let mut buf = alloc::vec![0_usize; 1 + len / core::mem::size_of::<usize>()]; + unsafe { + core::ptr::copy_nonoverlapping(bytes.as_ptr(), buf.as_mut_ptr() as *mut u8, len); + } + + auxv_from_buf(&buf) +} + +/// Tries to interpret the `buffer` as an auxiliary vector. If that fails, this +/// function returns `Err`. +#[cfg(feature = "std_detect_file_io")] +fn auxv_from_buf(buf: &[usize]) -> Result<AuxVec, alloc::string::String> { + // Targets with only AT_HWCAP: + #[cfg(any( + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "mips", + target_arch = "mips64", + target_arch = "loongarch32", + target_arch = "loongarch64", + ))] + { + for el in buf.chunks(2) { + match el[0] { + AT_NULL => break, + AT_HWCAP => return Ok(AuxVec { hwcap: el[1] }), + _ => (), + } + } + } + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + { + let mut hwcap = None; + // For some platforms, AT_HWCAP2 was added recently, so let it default to zero. + let mut hwcap2 = 0; + for el in buf.chunks(2) { + match el[0] { + AT_NULL => break, + AT_HWCAP => hwcap = Some(el[1]), + AT_HWCAP2 => hwcap2 = el[1], + _ => (), + } + } + + if let Some(hwcap) = hwcap { + return Ok(AuxVec { hwcap, hwcap2 }); + } + } + // Suppress unused variable + let _ = buf; + Err(alloc::string::String::from("hwcap not found")) +} + +#[cfg(test)] +mod tests; diff --git a/library/std_detect/src/detect/os/linux/auxvec/tests.rs b/library/std_detect/src/detect/os/linux/auxvec/tests.rs new file mode 100644 index 00000000000..536615fa272 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/auxvec/tests.rs @@ -0,0 +1,109 @@ +use super::*; + +// FIXME: on mips/mips64 getauxval returns 0, and /proc/self/auxv +// does not always contain the AT_HWCAP key under qemu. +#[cfg(any( + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", +))] +#[test] +fn auxv_crate() { + let v = auxv(); + if let Ok(hwcap) = getauxval(AT_HWCAP) { + let rt_hwcap = v.expect("failed to find hwcap key").hwcap; + assert_eq!(rt_hwcap, hwcap); + } + + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + { + if let Ok(hwcap2) = getauxval(AT_HWCAP2) { + let rt_hwcap2 = v.expect("failed to find hwcap2 key").hwcap2; + assert_eq!(rt_hwcap2, hwcap2); + } + } +} + +#[test] +fn auxv_dump() { + if let Ok(auxvec) = auxv() { + println!("{:?}", auxvec); + } else { + println!("both getauxval() and reading /proc/self/auxv failed!"); + } +} + +#[cfg(feature = "std_detect_file_io")] +cfg_if::cfg_if! { + if #[cfg(target_arch = "arm")] { + // The tests below can be executed under qemu, where we do not have access to the test + // files on disk, so we need to embed them with `include_bytes!`. + #[test] + fn linux_rpi3() { + let auxv = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-rpi3.auxv")); + let v = auxv_from_file_bytes(auxv).unwrap(); + assert_eq!(v.hwcap, 4174038); + assert_eq!(v.hwcap2, 16); + } + + #[test] + fn linux_macos_vb() { + let auxv = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv")); + // The file contains HWCAP but not HWCAP2. In that case, we treat HWCAP2 as zero. + let v = auxv_from_file_bytes(auxv).unwrap(); + assert_eq!(v.hwcap, 126614527); + assert_eq!(v.hwcap2, 0); + } + } else if #[cfg(target_arch = "aarch64")] { + #[cfg(target_endian = "little")] + #[test] + fn linux_artificial_aarch64() { + let auxv = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-artificial-aarch64.auxv")); + let v = auxv_from_file_bytes(auxv).unwrap(); + assert_eq!(v.hwcap, 0x0123456789abcdef); + assert_eq!(v.hwcap2, 0x02468ace13579bdf); + } + #[cfg(target_endian = "little")] + #[test] + fn linux_no_hwcap2_aarch64() { + let auxv = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-no-hwcap2-aarch64.auxv")); + let v = auxv_from_file_bytes(auxv).unwrap(); + // An absent HWCAP2 is treated as zero, and does not prevent acceptance of HWCAP. + assert_ne!(v.hwcap, 0); + assert_eq!(v.hwcap2, 0); + } + } +} + +#[test] +#[cfg(feature = "std_detect_file_io")] +fn auxv_dump_procfs() { + if let Ok(auxvec) = auxv_from_file("/proc/self/auxv") { + println!("{:?}", auxvec); + } else { + println!("reading /proc/self/auxv failed!"); + } +} + +#[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", +))] +#[test] +#[cfg(feature = "std_detect_file_io")] +fn auxv_crate_procfs() { + if let Ok(procfs_auxv) = auxv_from_file("/proc/self/auxv") { + assert_eq!(auxv().unwrap(), procfs_auxv); + } +} diff --git a/library/std_detect/src/detect/os/linux/loongarch.rs b/library/std_detect/src/detect/os/linux/loongarch.rs new file mode 100644 index 00000000000..e97fda11d08 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/loongarch.rs @@ -0,0 +1,61 @@ +//! Run-time feature detection for LoongArch on Linux. + +use core::arch::asm; + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, feature, enable| { + if enable { + value.set(feature as u32); + } + }; + + // The values are part of the platform-specific [cpucfg] + // + // [cpucfg]: LoongArch Reference Manual Volume 1: Basic Architecture v1.1 + let cpucfg2: usize; + unsafe { + asm!( + "cpucfg {}, {}", + out(reg) cpucfg2, in(reg) 2, + options(pure, nomem, preserves_flags, nostack) + ); + } + let cpucfg3: usize; + unsafe { + asm!( + "cpucfg {}, {}", + out(reg) cpucfg3, in(reg) 3, + options(pure, nomem, preserves_flags, nostack) + ); + } + enable_feature(&mut value, Feature::frecipe, bit::test(cpucfg2, 25)); + enable_feature(&mut value, Feature::div32, bit::test(cpucfg2, 26)); + enable_feature(&mut value, Feature::lam_bh, bit::test(cpucfg2, 27)); + enable_feature(&mut value, Feature::lamcas, bit::test(cpucfg2, 28)); + enable_feature(&mut value, Feature::scq, bit::test(cpucfg2, 30)); + enable_feature(&mut value, Feature::ld_seq_sa, bit::test(cpucfg3, 23)); + + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/loongarch/include/uapi/asm/hwcap.h + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::f, bit::test(cpucfg2, 1) && bit::test(auxv.hwcap, 3)); + enable_feature(&mut value, Feature::d, bit::test(cpucfg2, 2) && bit::test(auxv.hwcap, 3)); + enable_feature(&mut value, Feature::lsx, bit::test(auxv.hwcap, 4)); + enable_feature(&mut value, Feature::lasx, bit::test(auxv.hwcap, 5)); + enable_feature( + &mut value, + Feature::lbt, + bit::test(auxv.hwcap, 10) && bit::test(auxv.hwcap, 11) && bit::test(auxv.hwcap, 12), + ); + enable_feature(&mut value, Feature::lvz, bit::test(auxv.hwcap, 9)); + enable_feature(&mut value, Feature::ual, bit::test(auxv.hwcap, 2)); + return value; + } + value +} diff --git a/library/std_detect/src/detect/os/linux/mips.rs b/library/std_detect/src/detect/os/linux/mips.rs new file mode 100644 index 00000000000..0cfa8869887 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/mips.rs @@ -0,0 +1,23 @@ +//! Run-time feature detection for MIPS on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/mips/include/uapi/asm/hwcap.h + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::msa, bit::test(auxv.hwcap, 1)); + return value; + } + value +} diff --git a/library/std_detect/src/detect/os/linux/mod.rs b/library/std_detect/src/detect/os/linux/mod.rs new file mode 100644 index 00000000000..5ae2aaeab5b --- /dev/null +++ b/library/std_detect/src/detect/os/linux/mod.rs @@ -0,0 +1,69 @@ +//! Run-time feature detection on Linux +//! +#[cfg(feature = "std_detect_file_io")] +use alloc::vec::Vec; + +mod auxvec; + +#[cfg(feature = "std_detect_file_io")] +fn read_file(orig_path: &str) -> Result<Vec<u8>, alloc::string::String> { + use alloc::format; + + let mut path = Vec::from(orig_path.as_bytes()); + path.push(0); + + unsafe { + let file = libc::open(path.as_ptr() as *const libc::c_char, libc::O_RDONLY); + if file == -1 { + return Err(format!("Cannot open file at {orig_path}")); + } + + let mut data = Vec::new(); + loop { + data.reserve(4096); + let spare = data.spare_capacity_mut(); + match libc::read(file, spare.as_mut_ptr() as *mut _, spare.len()) { + -1 => { + libc::close(file); + return Err(format!("Error while reading from file at {orig_path}")); + } + 0 => break, + n => data.set_len(data.len() + n as usize), + } + } + + libc::close(file); + Ok(data) + } +} + +cfg_if::cfg_if! { + if #[cfg(target_arch = "aarch64")] { + mod aarch64; + pub(crate) use self::aarch64::detect_features; + } else if #[cfg(target_arch = "arm")] { + mod arm; + pub(crate) use self::arm::detect_features; + } else if #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] { + mod riscv; + pub(crate) use self::riscv::detect_features; + } else if #[cfg(any(target_arch = "mips", target_arch = "mips64"))] { + mod mips; + pub(crate) use self::mips::detect_features; + } else if #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] { + mod powerpc; + pub(crate) use self::powerpc::detect_features; + } else if #[cfg(any(target_arch = "loongarch32", target_arch = "loongarch64"))] { + mod loongarch; + pub(crate) use self::loongarch::detect_features; + } else if #[cfg(target_arch = "s390x")] { + mod s390x; + pub(crate) use self::s390x::detect_features; + } else { + use crate::detect::cache; + /// Performs run-time feature detection. + pub(crate) fn detect_features() -> cache::Initializer { + cache::Initializer::default() + } + } +} diff --git a/library/std_detect/src/detect/os/linux/powerpc.rs b/library/std_detect/src/detect/os/linux/powerpc.rs new file mode 100644 index 00000000000..6a4f7e715d9 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/powerpc.rs @@ -0,0 +1,35 @@ +//! Run-time feature detection for PowerPC on Linux. + +use super::auxvec; +use crate::detect::{Feature, cache}; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + // The values are part of the platform-specific [asm/cputable.h][cputable] + // + // [cputable]: https://github.com/torvalds/linux/blob/master/arch/powerpc/include/uapi/asm/cputable.h + if let Ok(auxv) = auxvec::auxv() { + // note: the PowerPC values are the mask to do the test (instead of the + // index of the bit to test like in ARM and Aarch64) + enable_feature(&mut value, Feature::altivec, auxv.hwcap & 0x10000000 != 0); + enable_feature(&mut value, Feature::vsx, auxv.hwcap & 0x00000080 != 0); + let power8_features = auxv.hwcap2 & 0x80000000 != 0; + enable_feature(&mut value, Feature::power8, power8_features); + enable_feature(&mut value, Feature::power8_altivec, power8_features); + enable_feature(&mut value, Feature::power8_crypto, power8_features); + enable_feature(&mut value, Feature::power8_vector, power8_features); + let power9_features = auxv.hwcap2 & 0x00800000 != 0; + enable_feature(&mut value, Feature::power9, power9_features); + enable_feature(&mut value, Feature::power9_altivec, power9_features); + enable_feature(&mut value, Feature::power9_vector, power9_features); + return value; + } + value +} diff --git a/library/std_detect/src/detect/os/linux/riscv.rs b/library/std_detect/src/detect/os/linux/riscv.rs new file mode 100644 index 00000000000..dbb3664890e --- /dev/null +++ b/library/std_detect/src/detect/os/linux/riscv.rs @@ -0,0 +1,321 @@ +//! Run-time feature detection for RISC-V on Linux. +//! +//! On RISC-V, detection using auxv only supports single-letter extensions. +//! So, we use riscv_hwprobe that supports multi-letter extensions if available. +//! <https://www.kernel.org/doc/html/latest/arch/riscv/hwprobe.html> + +use core::ptr; + +use super::super::riscv::imply_features; +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +// See <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/prctl.h?h=v6.15> +// for runtime status query constants. +const PR_RISCV_V_GET_CONTROL: libc::c_int = 70; +const PR_RISCV_V_VSTATE_CTRL_ON: libc::c_int = 2; +const PR_RISCV_V_VSTATE_CTRL_CUR_MASK: libc::c_int = 3; + +// See <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/include/uapi/asm/hwprobe.h?h=v6.15> +// for riscv_hwprobe struct and hardware probing constants. + +#[repr(C)] +struct riscv_hwprobe { + key: i64, + value: u64, +} + +impl riscv_hwprobe { + // key is overwritten to -1 if not supported by riscv_hwprobe syscall. + pub fn get(&self) -> Option<u64> { + (self.key != -1).then_some(self.value) + } +} + +#[allow(non_upper_case_globals)] +const __NR_riscv_hwprobe: libc::c_long = 258; + +const RISCV_HWPROBE_KEY_BASE_BEHAVIOR: i64 = 3; +const RISCV_HWPROBE_BASE_BEHAVIOR_IMA: u64 = 1 << 0; + +const RISCV_HWPROBE_KEY_IMA_EXT_0: i64 = 4; +const RISCV_HWPROBE_IMA_FD: u64 = 1 << 0; +const RISCV_HWPROBE_IMA_C: u64 = 1 << 1; +const RISCV_HWPROBE_IMA_V: u64 = 1 << 2; +const RISCV_HWPROBE_EXT_ZBA: u64 = 1 << 3; +const RISCV_HWPROBE_EXT_ZBB: u64 = 1 << 4; +const RISCV_HWPROBE_EXT_ZBS: u64 = 1 << 5; +const RISCV_HWPROBE_EXT_ZICBOZ: u64 = 1 << 6; +const RISCV_HWPROBE_EXT_ZBC: u64 = 1 << 7; +const RISCV_HWPROBE_EXT_ZBKB: u64 = 1 << 8; +const RISCV_HWPROBE_EXT_ZBKC: u64 = 1 << 9; +const RISCV_HWPROBE_EXT_ZBKX: u64 = 1 << 10; +const RISCV_HWPROBE_EXT_ZKND: u64 = 1 << 11; +const RISCV_HWPROBE_EXT_ZKNE: u64 = 1 << 12; +const RISCV_HWPROBE_EXT_ZKNH: u64 = 1 << 13; +const RISCV_HWPROBE_EXT_ZKSED: u64 = 1 << 14; +const RISCV_HWPROBE_EXT_ZKSH: u64 = 1 << 15; +const RISCV_HWPROBE_EXT_ZKT: u64 = 1 << 16; +const RISCV_HWPROBE_EXT_ZVBB: u64 = 1 << 17; +const RISCV_HWPROBE_EXT_ZVBC: u64 = 1 << 18; +const RISCV_HWPROBE_EXT_ZVKB: u64 = 1 << 19; +const RISCV_HWPROBE_EXT_ZVKG: u64 = 1 << 20; +const RISCV_HWPROBE_EXT_ZVKNED: u64 = 1 << 21; +const RISCV_HWPROBE_EXT_ZVKNHA: u64 = 1 << 22; +const RISCV_HWPROBE_EXT_ZVKNHB: u64 = 1 << 23; +const RISCV_HWPROBE_EXT_ZVKSED: u64 = 1 << 24; +const RISCV_HWPROBE_EXT_ZVKSH: u64 = 1 << 25; +const RISCV_HWPROBE_EXT_ZVKT: u64 = 1 << 26; +const RISCV_HWPROBE_EXT_ZFH: u64 = 1 << 27; +const RISCV_HWPROBE_EXT_ZFHMIN: u64 = 1 << 28; +const RISCV_HWPROBE_EXT_ZIHINTNTL: u64 = 1 << 29; +const RISCV_HWPROBE_EXT_ZVFH: u64 = 1 << 30; +const RISCV_HWPROBE_EXT_ZVFHMIN: u64 = 1 << 31; +const RISCV_HWPROBE_EXT_ZFA: u64 = 1 << 32; +const RISCV_HWPROBE_EXT_ZTSO: u64 = 1 << 33; +const RISCV_HWPROBE_EXT_ZACAS: u64 = 1 << 34; +const RISCV_HWPROBE_EXT_ZICOND: u64 = 1 << 35; +const RISCV_HWPROBE_EXT_ZIHINTPAUSE: u64 = 1 << 36; +const RISCV_HWPROBE_EXT_ZVE32X: u64 = 1 << 37; +const RISCV_HWPROBE_EXT_ZVE32F: u64 = 1 << 38; +const RISCV_HWPROBE_EXT_ZVE64X: u64 = 1 << 39; +const RISCV_HWPROBE_EXT_ZVE64F: u64 = 1 << 40; +const RISCV_HWPROBE_EXT_ZVE64D: u64 = 1 << 41; +const RISCV_HWPROBE_EXT_ZIMOP: u64 = 1 << 42; +const RISCV_HWPROBE_EXT_ZCA: u64 = 1 << 43; +const RISCV_HWPROBE_EXT_ZCB: u64 = 1 << 44; +const RISCV_HWPROBE_EXT_ZCD: u64 = 1 << 45; +const RISCV_HWPROBE_EXT_ZCF: u64 = 1 << 46; +const RISCV_HWPROBE_EXT_ZCMOP: u64 = 1 << 47; +const RISCV_HWPROBE_EXT_ZAWRS: u64 = 1 << 48; +// Excluded because it only reports the existence of `prctl`-based pointer masking control. +// const RISCV_HWPROBE_EXT_SUPM: u64 = 1 << 49; +const RISCV_HWPROBE_EXT_ZICNTR: u64 = 1 << 50; +const RISCV_HWPROBE_EXT_ZIHPM: u64 = 1 << 51; +const RISCV_HWPROBE_EXT_ZFBFMIN: u64 = 1 << 52; +const RISCV_HWPROBE_EXT_ZVFBFMIN: u64 = 1 << 53; +const RISCV_HWPROBE_EXT_ZVFBFWMA: u64 = 1 << 54; +const RISCV_HWPROBE_EXT_ZICBOM: u64 = 1 << 55; +const RISCV_HWPROBE_EXT_ZAAMO: u64 = 1 << 56; +const RISCV_HWPROBE_EXT_ZALRSC: u64 = 1 << 57; + +const RISCV_HWPROBE_KEY_CPUPERF_0: i64 = 5; +const RISCV_HWPROBE_MISALIGNED_FAST: u64 = 3; +const RISCV_HWPROBE_MISALIGNED_MASK: u64 = 7; + +const RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF: i64 = 9; +const RISCV_HWPROBE_MISALIGNED_SCALAR_FAST: u64 = 3; + +const RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF: i64 = 10; +const RISCV_HWPROBE_MISALIGNED_VECTOR_FAST: u64 = 3; + +// syscall returns an unsupported error if riscv_hwprobe is not supported, +// so we can safely use this function on older versions of Linux. +fn _riscv_hwprobe(out: &mut [riscv_hwprobe]) -> bool { + unsafe fn __riscv_hwprobe( + pairs: *mut riscv_hwprobe, + pair_count: libc::size_t, + cpu_set_size: libc::size_t, + cpus: *mut libc::c_ulong, + flags: libc::c_uint, + ) -> libc::c_long { + unsafe { libc::syscall(__NR_riscv_hwprobe, pairs, pair_count, cpu_set_size, cpus, flags) } + } + + unsafe { __riscv_hwprobe(out.as_mut_ptr(), out.len(), 0, ptr::null_mut(), 0) == 0 } +} + +/// Read list of supported features from (1) the auxiliary vector +/// and (2) the results of `riscv_hwprobe` and `prctl` system calls. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let mut enable_feature = |feature, enable| { + if enable { + value.set(feature as u32); + } + }; + + // Use auxiliary vector to enable single-letter ISA extensions. + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/include/uapi/asm/hwcap.h?h=v6.15 + let auxv = auxvec::auxv().expect("read auxvec"); // should not fail on RISC-V platform + let mut has_i = bit::test(auxv.hwcap, (b'i' - b'a').into()); + #[allow(clippy::eq_op)] + enable_feature(Feature::a, bit::test(auxv.hwcap, (b'a' - b'a').into())); + enable_feature(Feature::c, bit::test(auxv.hwcap, (b'c' - b'a').into())); + enable_feature(Feature::d, bit::test(auxv.hwcap, (b'd' - b'a').into())); + enable_feature(Feature::f, bit::test(auxv.hwcap, (b'f' - b'a').into())); + enable_feature(Feature::m, bit::test(auxv.hwcap, (b'm' - b'a').into())); + let has_v = bit::test(auxv.hwcap, (b'v' - b'a').into()); + let mut is_v_set = false; + + // Use riscv_hwprobe syscall to query more extensions and + // performance-related capabilities. + 'hwprobe: { + macro_rules! init { + { $($name: ident : $key: expr),* $(,)? } => { + #[repr(usize)] + enum Indices { $($name),* } + let mut t = [$(riscv_hwprobe { key: $key, value: 0 }),*]; + macro_rules! data_mut { () => { &mut t } } + macro_rules! query { [$idx: ident] => { t[Indices::$idx as usize].get() } } + } + } + init! { + BaseBehavior: RISCV_HWPROBE_KEY_BASE_BEHAVIOR, + Extensions: RISCV_HWPROBE_KEY_IMA_EXT_0, + MisalignedScalarPerf: RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF, + MisalignedVectorPerf: RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF, + MisalignedScalarPerfFallback: RISCV_HWPROBE_KEY_CPUPERF_0, + }; + if !_riscv_hwprobe(data_mut!()) { + break 'hwprobe; + } + + // Query scalar misaligned behavior. + if let Some(value) = query![MisalignedScalarPerf] { + enable_feature( + Feature::unaligned_scalar_mem, + value == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST, + ); + } else if let Some(value) = query![MisalignedScalarPerfFallback] { + // Deprecated method for fallback + enable_feature( + Feature::unaligned_scalar_mem, + value & RISCV_HWPROBE_MISALIGNED_MASK == RISCV_HWPROBE_MISALIGNED_FAST, + ); + } + + // Query vector misaligned behavior. + if let Some(value) = query![MisalignedVectorPerf] { + enable_feature( + Feature::unaligned_vector_mem, + value == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST, + ); + } + + // Query whether "I" base and extensions "M" and "A" (as in the ISA + // manual version 2.2) are enabled. "I" base at that time corresponds + // to "I", "Zicsr", "Zicntr" and "Zifencei" (as in the ISA manual version + // 20240411). + // This is a current requirement of + // `RISCV_HWPROBE_KEY_IMA_EXT_0`-based tests. + if query![BaseBehavior].is_none_or(|value| value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA == 0) { + break 'hwprobe; + } + has_i = true; + enable_feature(Feature::zicsr, true); + enable_feature(Feature::zicntr, true); + enable_feature(Feature::zifencei, true); + enable_feature(Feature::m, true); + enable_feature(Feature::a, true); + + // Enable features based on `RISCV_HWPROBE_KEY_IMA_EXT_0`. + let Some(ima_ext_0) = query![Extensions] else { + break 'hwprobe; + }; + let test = |mask| (ima_ext_0 & mask) != 0; + + enable_feature(Feature::d, test(RISCV_HWPROBE_IMA_FD)); // F is implied. + enable_feature(Feature::c, test(RISCV_HWPROBE_IMA_C)); + + enable_feature(Feature::zicntr, test(RISCV_HWPROBE_EXT_ZICNTR)); + enable_feature(Feature::zihpm, test(RISCV_HWPROBE_EXT_ZIHPM)); + + enable_feature(Feature::zihintntl, test(RISCV_HWPROBE_EXT_ZIHINTNTL)); + enable_feature(Feature::zihintpause, test(RISCV_HWPROBE_EXT_ZIHINTPAUSE)); + enable_feature(Feature::zimop, test(RISCV_HWPROBE_EXT_ZIMOP)); + enable_feature(Feature::zicbom, test(RISCV_HWPROBE_EXT_ZICBOM)); + enable_feature(Feature::zicboz, test(RISCV_HWPROBE_EXT_ZICBOZ)); + enable_feature(Feature::zicond, test(RISCV_HWPROBE_EXT_ZICOND)); + + enable_feature(Feature::zalrsc, test(RISCV_HWPROBE_EXT_ZALRSC)); + enable_feature(Feature::zaamo, test(RISCV_HWPROBE_EXT_ZAAMO)); + enable_feature(Feature::zawrs, test(RISCV_HWPROBE_EXT_ZAWRS)); + enable_feature(Feature::zacas, test(RISCV_HWPROBE_EXT_ZACAS)); + enable_feature(Feature::ztso, test(RISCV_HWPROBE_EXT_ZTSO)); + + enable_feature(Feature::zba, test(RISCV_HWPROBE_EXT_ZBA)); + enable_feature(Feature::zbb, test(RISCV_HWPROBE_EXT_ZBB)); + enable_feature(Feature::zbs, test(RISCV_HWPROBE_EXT_ZBS)); + enable_feature(Feature::zbc, test(RISCV_HWPROBE_EXT_ZBC)); + + enable_feature(Feature::zbkb, test(RISCV_HWPROBE_EXT_ZBKB)); + enable_feature(Feature::zbkc, test(RISCV_HWPROBE_EXT_ZBKC)); + enable_feature(Feature::zbkx, test(RISCV_HWPROBE_EXT_ZBKX)); + enable_feature(Feature::zknd, test(RISCV_HWPROBE_EXT_ZKND)); + enable_feature(Feature::zkne, test(RISCV_HWPROBE_EXT_ZKNE)); + enable_feature(Feature::zknh, test(RISCV_HWPROBE_EXT_ZKNH)); + enable_feature(Feature::zksed, test(RISCV_HWPROBE_EXT_ZKSED)); + enable_feature(Feature::zksh, test(RISCV_HWPROBE_EXT_ZKSH)); + enable_feature(Feature::zkt, test(RISCV_HWPROBE_EXT_ZKT)); + + enable_feature(Feature::zcmop, test(RISCV_HWPROBE_EXT_ZCMOP)); + enable_feature(Feature::zca, test(RISCV_HWPROBE_EXT_ZCA)); + enable_feature(Feature::zcf, test(RISCV_HWPROBE_EXT_ZCF)); + enable_feature(Feature::zcd, test(RISCV_HWPROBE_EXT_ZCD)); + enable_feature(Feature::zcb, test(RISCV_HWPROBE_EXT_ZCB)); + + enable_feature(Feature::zfh, test(RISCV_HWPROBE_EXT_ZFH)); + enable_feature(Feature::zfhmin, test(RISCV_HWPROBE_EXT_ZFHMIN)); + enable_feature(Feature::zfa, test(RISCV_HWPROBE_EXT_ZFA)); + enable_feature(Feature::zfbfmin, test(RISCV_HWPROBE_EXT_ZFBFMIN)); + + // Use prctl (if any) to determine whether the vector extension + // is enabled on the current thread (assuming the entire process + // share the same status). If prctl fails (e.g. QEMU userland emulator + // as of version 9.2.3), use auxiliary vector to retrieve the default + // vector status on the process startup. + let has_vectors = { + let v_status = unsafe { libc::prctl(PR_RISCV_V_GET_CONTROL) }; + if v_status >= 0 { + (v_status & PR_RISCV_V_VSTATE_CTRL_CUR_MASK) == PR_RISCV_V_VSTATE_CTRL_ON + } else { + has_v + } + }; + if has_vectors { + enable_feature(Feature::v, test(RISCV_HWPROBE_IMA_V)); + enable_feature(Feature::zve32x, test(RISCV_HWPROBE_EXT_ZVE32X)); + enable_feature(Feature::zve32f, test(RISCV_HWPROBE_EXT_ZVE32F)); + enable_feature(Feature::zve64x, test(RISCV_HWPROBE_EXT_ZVE64X)); + enable_feature(Feature::zve64f, test(RISCV_HWPROBE_EXT_ZVE64F)); + enable_feature(Feature::zve64d, test(RISCV_HWPROBE_EXT_ZVE64D)); + + enable_feature(Feature::zvbb, test(RISCV_HWPROBE_EXT_ZVBB)); + enable_feature(Feature::zvbc, test(RISCV_HWPROBE_EXT_ZVBC)); + enable_feature(Feature::zvkb, test(RISCV_HWPROBE_EXT_ZVKB)); + enable_feature(Feature::zvkg, test(RISCV_HWPROBE_EXT_ZVKG)); + enable_feature(Feature::zvkned, test(RISCV_HWPROBE_EXT_ZVKNED)); + enable_feature(Feature::zvknha, test(RISCV_HWPROBE_EXT_ZVKNHA)); + enable_feature(Feature::zvknhb, test(RISCV_HWPROBE_EXT_ZVKNHB)); + enable_feature(Feature::zvksed, test(RISCV_HWPROBE_EXT_ZVKSED)); + enable_feature(Feature::zvksh, test(RISCV_HWPROBE_EXT_ZVKSH)); + enable_feature(Feature::zvkt, test(RISCV_HWPROBE_EXT_ZVKT)); + + enable_feature(Feature::zvfh, test(RISCV_HWPROBE_EXT_ZVFH)); + enable_feature(Feature::zvfhmin, test(RISCV_HWPROBE_EXT_ZVFHMIN)); + enable_feature(Feature::zvfbfmin, test(RISCV_HWPROBE_EXT_ZVFBFMIN)); + enable_feature(Feature::zvfbfwma, test(RISCV_HWPROBE_EXT_ZVFBFWMA)); + } + is_v_set = true; + }; + + // Set V purely depending on the auxiliary vector + // only if no fine-grained vector extension detection is available. + if !is_v_set { + enable_feature(Feature::v, has_v); + } + + // Handle base ISA. + // If future RV128I is supported, implement with `enable_feature` here. + // Note that we should use `target_arch` instead of `target_pointer_width` + // to avoid misdetection caused by experimental ABIs such as RV64ILP32. + #[cfg(target_arch = "riscv64")] + enable_feature(Feature::rv64i, has_i); + #[cfg(target_arch = "riscv32")] + enable_feature(Feature::rv32i, has_i); + + imply_features(value) +} diff --git a/library/std_detect/src/detect/os/linux/s390x.rs b/library/std_detect/src/detect/os/linux/s390x.rs new file mode 100644 index 00000000000..9b53f526d61 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/s390x.rs @@ -0,0 +1,152 @@ +//! Run-time feature detection for s390x on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +/// Try to read the features from the auxiliary vector +pub(crate) fn detect_features() -> cache::Initializer { + let opt_hwcap: Option<AtHwcap> = auxvec::auxv().ok().map(Into::into); + let facilities = ExtendedFacilityList::new(); + cache(opt_hwcap, facilities) +} + +#[derive(Debug, Default, PartialEq)] +struct AtHwcap { + esan3: bool, + zarch: bool, + stfle: bool, + msa: bool, + ldisp: bool, + eimm: bool, + dfp: bool, + hpage: bool, + etf3eh: bool, + high_gprs: bool, + te: bool, + vxrs: bool, + vxrs_bcd: bool, + vxrs_ext: bool, + gs: bool, + vxrs_ext2: bool, + vxrs_pde: bool, + sort: bool, + dflt: bool, + vxrs_pde2: bool, + nnpa: bool, + pci_mio: bool, + sie: bool, +} + +impl From<auxvec::AuxVec> for AtHwcap { + /// Reads AtHwcap from the auxiliary vector. + fn from(auxv: auxvec::AuxVec) -> Self { + AtHwcap { + esan3: bit::test(auxv.hwcap, 0), + zarch: bit::test(auxv.hwcap, 1), + stfle: bit::test(auxv.hwcap, 2), + msa: bit::test(auxv.hwcap, 3), + ldisp: bit::test(auxv.hwcap, 4), + eimm: bit::test(auxv.hwcap, 5), + dfp: bit::test(auxv.hwcap, 6), + hpage: bit::test(auxv.hwcap, 7), + etf3eh: bit::test(auxv.hwcap, 8), + high_gprs: bit::test(auxv.hwcap, 9), + te: bit::test(auxv.hwcap, 10), + vxrs: bit::test(auxv.hwcap, 11), + vxrs_bcd: bit::test(auxv.hwcap, 12), + vxrs_ext: bit::test(auxv.hwcap, 13), + gs: bit::test(auxv.hwcap, 14), + vxrs_ext2: bit::test(auxv.hwcap, 15), + vxrs_pde: bit::test(auxv.hwcap, 16), + sort: bit::test(auxv.hwcap, 17), + dflt: bit::test(auxv.hwcap, 18), + vxrs_pde2: bit::test(auxv.hwcap, 19), + nnpa: bit::test(auxv.hwcap, 20), + pci_mio: bit::test(auxv.hwcap, 21), + sie: bit::test(auxv.hwcap, 22), + } + } +} + +struct ExtendedFacilityList([u64; 4]); + +impl ExtendedFacilityList { + fn new() -> Self { + let mut result: [u64; 4] = [0; 4]; + // SAFETY: rust/llvm only support s390x version with the `stfle` instruction. + unsafe { + core::arch::asm!( + // equivalently ".insn s, 0xb2b00000, 0({1})", + "stfle 0({})", + in(reg_addr) result.as_mut_ptr() , + inout("r0") result.len() as u64 - 1 => _, + options(nostack) + ); + } + Self(result) + } + + const fn get_bit(&self, n: usize) -> bool { + // NOTE: bits are numbered from the left. + self.0[n / 64] & (1 << (63 - (n % 64))) != 0 + } +} + +/// Initializes the cache from the feature bits. +/// +/// These values are part of the platform-specific [asm/elf.h][kernel], and are a selection of the +/// fields found in the [Facility Indications]. +/// +/// [Facility Indications]: https://www.ibm.com/support/pages/sites/default/files/2021-05/SA22-7871-10.pdf#page=63 +/// [kernel]: https://github.com/torvalds/linux/blob/b62cef9a5c673f1b8083159f5dc03c1c5daced2f/arch/s390/include/asm/elf.h#L129 +fn cache(hwcap: Option<AtHwcap>, facilities: ExtendedFacilityList) -> cache::Initializer { + let mut value = cache::Initializer::default(); + + { + let mut enable_if_set = |bit_index, f| { + if facilities.get_bit(bit_index) { + value.set(f as u32); + } + }; + + // We use HWCAP for `vector` because it requires both hardware and kernel support. + if let Some(AtHwcap { vxrs: true, .. }) = hwcap { + // vector and related + + enable_if_set(129, Feature::vector); + + enable_if_set(135, Feature::vector_enhancements_1); + enable_if_set(148, Feature::vector_enhancements_2); + enable_if_set(198, Feature::vector_enhancements_3); + + enable_if_set(134, Feature::vector_packed_decimal); + enable_if_set(152, Feature::vector_packed_decimal_enhancement); + enable_if_set(192, Feature::vector_packed_decimal_enhancement_2); + enable_if_set(199, Feature::vector_packed_decimal_enhancement_3); + + enable_if_set(165, Feature::nnp_assist); + } + + // others + + enable_if_set(76, Feature::message_security_assist_extension3); + enable_if_set(77, Feature::message_security_assist_extension4); + enable_if_set(57, Feature::message_security_assist_extension5); + enable_if_set(146, Feature::message_security_assist_extension8); + enable_if_set(155, Feature::message_security_assist_extension9); + enable_if_set(86, Feature::message_security_assist_extension12); + + enable_if_set(58, Feature::miscellaneous_extensions_2); + enable_if_set(61, Feature::miscellaneous_extensions_3); + enable_if_set(84, Feature::miscellaneous_extensions_4); + + enable_if_set(45, Feature::high_word); + enable_if_set(73, Feature::transactional_execution); + enable_if_set(133, Feature::guarded_storage); + enable_if_set(150, Feature::enhanced_sort); + enable_if_set(151, Feature::deflate_conversion); + enable_if_set(201, Feature::concurrent_functions); + } + + value +} diff --git a/library/std_detect/src/detect/os/openbsd/aarch64.rs b/library/std_detect/src/detect/os/openbsd/aarch64.rs new file mode 100644 index 00000000000..2fae47b05c4 --- /dev/null +++ b/library/std_detect/src/detect/os/openbsd/aarch64.rs @@ -0,0 +1,57 @@ +//! Run-time feature detection for Aarch64 on OpenBSD. +//! +//! OpenBSD doesn't trap the mrs instruction, but exposes the system registers through sysctl. +//! https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8 +//! https://github.com/golang/go/commit/cd54ef1f61945459486e9eea2f016d99ef1da925 + +use core::mem::MaybeUninit; +use core::ptr; + +use crate::detect::cache; + +// Defined in machine/cpu.h. +// https://github.com/openbsd/src/blob/72ccc03bd11da614f31f7ff76e3f6fce99bc1c79/sys/arch/arm64/include/cpu.h#L25-L40 +const CPU_ID_AA64ISAR0: libc::c_int = 2; +const CPU_ID_AA64ISAR1: libc::c_int = 3; +const CPU_ID_AA64MMFR2: libc::c_int = 7; +const CPU_ID_AA64PFR0: libc::c_int = 8; + +/// Try to read the features from the system registers. +pub(crate) fn detect_features() -> cache::Initializer { + // ID_AA64ISAR0_EL1 and ID_AA64ISAR1_EL1 are supported on OpenBSD 7.1+. + // https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8 + // Others are supported on OpenBSD 7.3+. + // https://github.com/openbsd/src/commit/c7654cd65262d532212f65123ee3905ba200365c + // sysctl returns an unsupported error if operation is not supported, + // so we can safely use this function on older versions of OpenBSD. + let aa64isar0 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64ISAR0]).unwrap_or(0); + let aa64isar1 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64ISAR1]).unwrap_or(0); + let aa64mmfr2 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64MMFR2]).unwrap_or(0); + // Do not use unwrap_or(0) because in fp and asimd fields, 0 indicates that + // the feature is available. + let aa64pfr0 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64PFR0]); + + super::aarch64::parse_system_registers(aa64isar0, aa64isar1, aa64mmfr2, aa64pfr0) +} + +#[inline] +fn sysctl64(mib: &[libc::c_int]) -> Option<u64> { + const OUT_LEN: libc::size_t = core::mem::size_of::<u64>(); + let mut out = MaybeUninit::<u64>::uninit(); + let mut out_len = OUT_LEN; + let res = unsafe { + libc::sysctl( + mib.as_ptr(), + mib.len() as libc::c_uint, + out.as_mut_ptr() as *mut libc::c_void, + &mut out_len, + ptr::null_mut(), + 0, + ) + }; + if res == -1 || out_len != OUT_LEN { + return None; + } + // SAFETY: we've checked that sysctl was successful and `out` was filled. + Some(unsafe { out.assume_init() }) +} diff --git a/library/std_detect/src/detect/os/other.rs b/library/std_detect/src/detect/os/other.rs new file mode 100644 index 00000000000..091fafc4ebf --- /dev/null +++ b/library/std_detect/src/detect/os/other.rs @@ -0,0 +1,8 @@ +//! Other operating systems + +use crate::detect::cache; + +#[allow(dead_code)] +pub(crate) fn detect_features() -> cache::Initializer { + cache::Initializer::default() +} diff --git a/library/std_detect/src/detect/os/riscv.rs b/library/std_detect/src/detect/os/riscv.rs new file mode 100644 index 00000000000..dc9a4036d86 --- /dev/null +++ b/library/std_detect/src/detect/os/riscv.rs @@ -0,0 +1,139 @@ +//! Run-time feature detection utility for RISC-V. +//! +//! On RISC-V, full feature detection needs a help of one or more +//! feature detection mechanisms (usually provided by the operating system). +//! +//! RISC-V architecture defines many extensions and some have dependency to others. +//! More importantly, some of them cannot be enabled without resolving such +//! dependencies due to limited set of features that such mechanisms provide. +//! +//! This module provides an OS-independent utility to process such relations +//! between RISC-V extensions. + +use crate::detect::{Feature, cache}; + +/// Imply features by the given set of enabled features. +/// +/// Note that it does not perform any consistency checks including existence of +/// conflicting extensions and/or complicated requirements. Eliminating such +/// inconsistencies is the responsibility of the feature detection logic and +/// its provider(s). +pub(crate) fn imply_features(mut value: cache::Initializer) -> cache::Initializer { + loop { + // Check convergence of the feature flags later. + let prev = value; + + // Expect that the optimizer turns repeated operations into + // a fewer number of bit-manipulation operations. + macro_rules! imply { + // Regular implication: + // A1 => (B1[, B2...]), A2 => (B1[, B2...]) and so on. + ($($from: ident)|+ => $($to: ident)&+) => { + if [$(Feature::$from as u32),+].iter().any(|&x| value.test(x)) { + $( + value.set(Feature::$to as u32); + )+ + } + }; + // Implication with multiple requirements: + // A1 && A2 ... => (B1[, B2...]). + ($($from: ident)&+ => $($to: ident)&+) => { + if [$(Feature::$from as u32),+].iter().all(|&x| value.test(x)) { + $( + value.set(Feature::$to as u32); + )+ + } + }; + } + macro_rules! group { + ($group: ident == $($member: ident)&+) => { + // Forward implication as defined in the specifications. + imply!($group => $($member)&+); + // Reverse implication to "group extension" from its members. + // This is not a part of specifications but convenient for + // feature detection and implemented in e.g. LLVM. + imply!($($member)&+ => $group); + }; + } + + /* + If a dependency/implication is not explicitly stated in the + specification, it is denoted as a comment as follows: + "defined as subset": + The latter extension is described as a subset of the former + (but the evidence is weak). + "functional": + The former extension is functionally a superset of the latter + (no direct references though). + */ + + imply!(zvbb => zvkb); + + // Certain set of vector cryptography extensions form a group. + group!(zvkn == zvkned & zvknhb & zvkb & zvkt); + group!(zvknc == zvkn & zvbc); + group!(zvkng == zvkn & zvkg); + group!(zvks == zvksed & zvksh & zvkb & zvkt); + group!(zvksc == zvks & zvbc); + group!(zvksg == zvks & zvkg); + + imply!(zvknhb => zvknha); // functional + + // For vector cryptography, Zvknhb and Zvbc require integer arithmetic + // with EEW=64 (Zve64x) while others not depending on them + // require EEW=32 (Zve32x). + imply!(zvknhb | zvbc => zve64x); + imply!(zvbb | zvkb | zvkg | zvkned | zvknha | zvksed | zvksh => zve32x); + + imply!(zbc => zbkc); // defined as subset + group!(zkn == zbkb & zbkc & zbkx & zkne & zknd & zknh); + group!(zks == zbkb & zbkc & zbkx & zksed & zksh); + group!(zk == zkn & zkr & zkt); + + imply!(zacas => zaamo); + group!(a == zalrsc & zaamo); + + group!(b == zba & zbb & zbs); + + imply!(zcf => zca & f); + imply!(zcd => zca & d); + imply!(zcmop | zcb => zca); + + imply!(zhinx => zhinxmin); + imply!(zdinx | zhinxmin => zfinx); + + imply!(zvfh => zvfhmin); // functional + imply!(zvfh => zve32f & zfhmin); + imply!(zvfhmin => zve32f); + imply!(zvfbfwma => zvfbfmin & zfbfmin); + imply!(zvfbfmin => zve32f); + + imply!(v => zve64d); + imply!(zve64d => zve64f & d); + imply!(zve64f => zve64x & zve32f); + imply!(zve64x => zve32x); + imply!(zve32f => zve32x & f); + + imply!(zfh => zfhmin); + imply!(q => d); + imply!(d | zfhmin | zfa => f); + imply!(zfbfmin => f); // and some of (not all) "Zfh" instructions. + + // Relatively complex implication rules from the "C" extension. + imply!(c => zca); + imply!(c & d => zcd); + #[cfg(target_arch = "riscv32")] + imply!(c & f => zcf); + + imply!(zicntr | zihpm | f | zfinx | zve32x => zicsr); + + // Loop until the feature flags converge. + if prev == value { + return value; + } + } +} + +#[cfg(test)] +#[path = "riscv/tests.rs"] +mod tests; diff --git a/library/std_detect/src/detect/os/riscv/tests.rs b/library/std_detect/src/detect/os/riscv/tests.rs new file mode 100644 index 00000000000..99a81dee05a --- /dev/null +++ b/library/std_detect/src/detect/os/riscv/tests.rs @@ -0,0 +1,64 @@ +use super::*; + +#[test] +fn simple_direct() { + let mut value = cache::Initializer::default(); + value.set(Feature::f as u32); + // F (and other extensions with CSRs) -> Zicsr + assert!(imply_features(value).test(Feature::zicsr as u32)); +} + +#[test] +fn simple_indirect() { + let mut value = cache::Initializer::default(); + value.set(Feature::q as u32); + // Q -> D, D -> F, F -> Zicsr + assert!(imply_features(value).test(Feature::zicsr as u32)); +} + +#[test] +fn complex_zcd() { + let mut value = cache::Initializer::default(); + // C & D -> Zcd + value.set(Feature::c as u32); + assert!(!imply_features(value).test(Feature::zcd as u32)); + value.set(Feature::d as u32); + assert!(imply_features(value).test(Feature::zcd as u32)); +} + +#[test] +fn group_simple_forward() { + let mut value = cache::Initializer::default(); + // A -> Zalrsc & Zaamo (forward implication) + value.set(Feature::a as u32); + let value = imply_features(value); + assert!(value.test(Feature::zalrsc as u32)); + assert!(value.test(Feature::zaamo as u32)); +} + +#[test] +fn group_simple_backward() { + let mut value = cache::Initializer::default(); + // Zalrsc & Zaamo -> A (reverse implication) + value.set(Feature::zalrsc as u32); + value.set(Feature::zaamo as u32); + assert!(imply_features(value).test(Feature::a as u32)); +} + +#[test] +fn group_complex_convergence() { + let mut value = cache::Initializer::default(); + // Needs 3 iterations to converge + // (and 4th iteration for convergence checking): + // 1. [Zvksc] -> Zvks & Zvbc + // 2. Zvks -> Zvksed & Zvksh & Zvkb & Zvkt + // 3a. [Zvkned] & [Zvknhb] & [Zvkb] & Zvkt -> {Zvkn} + // 3b. Zvkn & Zvbc -> {Zvknc} + value.set(Feature::zvksc as u32); + value.set(Feature::zvkned as u32); + value.set(Feature::zvknhb as u32); + value.set(Feature::zvkb as u32); + let value = imply_features(value); + assert!(value.test(Feature::zvkn as u32)); + assert!(value.test(Feature::zvknc as u32)); +} diff --git a/library/std_detect/src/detect/os/windows/aarch64.rs b/library/std_detect/src/detect/os/windows/aarch64.rs new file mode 100644 index 00000000000..937f9f26eed --- /dev/null +++ b/library/std_detect/src/detect/os/windows/aarch64.rs @@ -0,0 +1,125 @@ +//! Run-time feature detection for Aarch64 on Windows. + +use crate::detect::{Feature, cache}; + +/// Try to read the features using IsProcessorFeaturePresent. +pub(crate) fn detect_features() -> cache::Initializer { + type DWORD = u32; + type BOOL = i32; + + const FALSE: BOOL = 0; + // The following Microsoft documents isn't updated for aarch64. + // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent + // These are defined in winnt.h of Windows SDK + const PF_ARM_VFP_32_REGISTERS_AVAILABLE: u32 = 18; + const PF_ARM_NEON_INSTRUCTIONS_AVAILABLE: u32 = 19; + const PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE: u32 = 30; + const PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE: u32 = 31; + const PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE: u32 = 34; + const PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE: u32 = 43; + const PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE: u32 = 44; + const PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE: u32 = 45; + const PF_ARM_SVE_INSTRUCTIONS_AVAILABLE: u32 = 46; + const PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE: u32 = 47; + const PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE: u32 = 48; + const PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE: u32 = 49; + const PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE: u32 = 50; + const PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE: u32 = 51; + // const PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE: u32 = 52; + // const PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE: u32 = 53; + const PF_ARM_SVE_B16B16_INSTRUCTIONS_AVAILABLE: u32 = 54; + const PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE: u32 = 55; + const PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE: u32 = 56; + // const PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE: u32 = 57; + // const PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE: u32 = 58; + // const PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE: u32 = 59; + + unsafe extern "system" { + fn IsProcessorFeaturePresent(ProcessorFeature: DWORD) -> BOOL; + } + + let mut value = cache::Initializer::default(); + { + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + // Some features may be supported on current CPU, + // but no way to detect it by OS API. + // Also, we require unsafe block for the extern "system" calls. + unsafe { + enable_feature( + Feature::fp, + IsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::asimd, + IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::crc, + IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::lse, + IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::dotprod, + IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::jsconv, + IsProcessorFeaturePresent(PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::rcpc, + IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve, + IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve2, + IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve2p1, + IsProcessorFeaturePresent(PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve2_aes, + IsProcessorFeaturePresent(PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE) != FALSE + && IsProcessorFeaturePresent(PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE) + != FALSE, + ); + enable_feature( + Feature::sve2_bitperm, + IsProcessorFeaturePresent(PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve_b16b16, + IsProcessorFeaturePresent(PF_ARM_SVE_B16B16_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve2_sha3, + IsProcessorFeaturePresent(PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve2_sm4, + IsProcessorFeaturePresent(PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE means aes, sha1, sha2 and + // pmull support + let crypto = + IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != FALSE; + enable_feature(Feature::aes, crypto); + enable_feature(Feature::pmull, crypto); + enable_feature(Feature::sha2, crypto); + } + } + value +} diff --git a/library/std_detect/src/detect/os/x86.rs b/library/std_detect/src/detect/os/x86.rs new file mode 100644 index 00000000000..20f848ab05c --- /dev/null +++ b/library/std_detect/src/detect/os/x86.rs @@ -0,0 +1,316 @@ +//! x86 run-time feature detection is OS independent. + +#[cfg(target_arch = "x86")] +use core::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::*; +use core::mem; + +use crate::detect::{Feature, bit, cache}; + +/// Run-time feature detection on x86 works by using the CPUID instruction. +/// +/// The [CPUID Wikipedia page][wiki_cpuid] contains +/// all the information about which flags to set to query which values, and in +/// which registers these are reported. +/// +/// The definitive references are: +/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: +/// Instruction Set Reference, A-Z][intel64_ref]. +/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and +/// System Instructions][amd64_ref]. +/// +/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID +/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf +#[allow(clippy::similar_names)] +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + + if cfg!(target_env = "sgx") { + // doesn't support this because it is untrusted data + return value; + } + + // Calling `__cpuid`/`__cpuid_count` from here on is safe because the CPU + // has `cpuid` support. + + // 0. EAX = 0: Basic Information: + // - EAX returns the "Highest Function Parameter", that is, the maximum + // leaf value for subsequent calls of `cpuinfo` in range [0, + // 0x8000_0000]. - The vendor ID is stored in 12 u8 ascii chars, + // returned in EBX, EDX, and ECX (in that order): + let (max_basic_leaf, vendor_id) = unsafe { + let CpuidResult { eax: max_basic_leaf, ebx, ecx, edx } = __cpuid(0); + let vendor_id: [[u8; 4]; 3] = [ebx.to_ne_bytes(), edx.to_ne_bytes(), ecx.to_ne_bytes()]; + let vendor_id: [u8; 12] = mem::transmute(vendor_id); + (max_basic_leaf, vendor_id) + }; + + if max_basic_leaf < 1 { + // Earlier Intel 486, CPUID not implemented + return value; + } + + // EAX = 1, ECX = 0: Queries "Processor Info and Feature Bits"; + // Contains information about most x86 features. + let CpuidResult { ecx: proc_info_ecx, edx: proc_info_edx, .. } = + unsafe { __cpuid(0x0000_0001_u32) }; + + // EAX = 7: Queries "Extended Features"; + // Contains information about bmi,bmi2, and avx2 support. + let ( + extended_features_ebx, + extended_features_ecx, + extended_features_edx, + extended_features_eax_leaf_1, + extended_features_edx_leaf_1, + ) = if max_basic_leaf >= 7 { + let CpuidResult { ebx, ecx, edx, .. } = unsafe { __cpuid(0x0000_0007_u32) }; + let CpuidResult { eax: eax_1, edx: edx_1, .. } = + unsafe { __cpuid_count(0x0000_0007_u32, 0x0000_0001_u32) }; + (ebx, ecx, edx, eax_1, edx_1) + } else { + (0, 0, 0, 0, 0) // CPUID does not support "Extended Features" + }; + + // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported + // - EAX returns the max leaf value for extended information, that is, + // `cpuid` calls in range [0x8000_0000; u32::MAX]: + let CpuidResult { eax: extended_max_basic_leaf, .. } = unsafe { __cpuid(0x8000_0000_u32) }; + + // EAX = 0x8000_0001, ECX=0: Queries "Extended Processor Info and Feature + // Bits" + let extended_proc_info_ecx = if extended_max_basic_leaf >= 1 { + let CpuidResult { ecx, .. } = unsafe { __cpuid(0x8000_0001_u32) }; + ecx + } else { + 0 + }; + + { + // borrows value till the end of this scope: + let mut enable = |r, rb, f| { + let present = bit::test(r as usize, rb); + if present { + value.set(f as u32); + } + present + }; + + enable(proc_info_ecx, 0, Feature::sse3); + enable(proc_info_ecx, 1, Feature::pclmulqdq); + enable(proc_info_ecx, 9, Feature::ssse3); + enable(proc_info_ecx, 13, Feature::cmpxchg16b); + enable(proc_info_ecx, 19, Feature::sse4_1); + enable(proc_info_ecx, 20, Feature::sse4_2); + enable(proc_info_ecx, 22, Feature::movbe); + enable(proc_info_ecx, 23, Feature::popcnt); + enable(proc_info_ecx, 25, Feature::aes); + let f16c = enable(proc_info_ecx, 29, Feature::f16c); + enable(proc_info_ecx, 30, Feature::rdrand); + enable(extended_features_ebx, 18, Feature::rdseed); + enable(extended_features_ebx, 19, Feature::adx); + enable(extended_features_ebx, 11, Feature::rtm); + enable(proc_info_edx, 4, Feature::tsc); + enable(proc_info_edx, 23, Feature::mmx); + enable(proc_info_edx, 24, Feature::fxsr); + enable(proc_info_edx, 25, Feature::sse); + enable(proc_info_edx, 26, Feature::sse2); + enable(extended_features_ebx, 29, Feature::sha); + + enable(extended_features_ecx, 8, Feature::gfni); + enable(extended_features_ecx, 9, Feature::vaes); + enable(extended_features_ecx, 10, Feature::vpclmulqdq); + + enable(extended_features_ebx, 3, Feature::bmi1); + enable(extended_features_ebx, 8, Feature::bmi2); + + enable(extended_features_ebx, 9, Feature::ermsb); + + enable(extended_features_eax_leaf_1, 31, Feature::movrs); + + // Detect if CPUID.19h available + if bit::test(extended_features_ecx as usize, 23) { + let CpuidResult { ebx, .. } = unsafe { __cpuid(0x19) }; + enable(ebx, 0, Feature::kl); + enable(ebx, 2, Feature::widekl); + } + + // `XSAVE` and `AVX` support: + let cpu_xsave = bit::test(proc_info_ecx as usize, 26); + if cpu_xsave { + // 0. Here the CPU supports `XSAVE`. + + // 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and + // supports saving the state of the AVX/AVX2 vector registers on + // context-switches, see: + // + // - [intel: is avx enabled?][is_avx_enabled], + // - [mozilla: sse.cpp][mozilla_sse_cpp]. + // + // [is_avx_enabled]: https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled + // [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190 + let cpu_osxsave = bit::test(proc_info_ecx as usize, 27); + + if cpu_osxsave { + // 2. The OS must have signaled the CPU that it supports saving and + // restoring the: + // + // * SSE -> `XCR0.SSE[1]` + // * AVX -> `XCR0.AVX[2]` + // * AVX-512 -> `XCR0.AVX-512[7:5]`. + // * AMX -> `XCR0.AMX[18:17]` + // + // by setting the corresponding bits of `XCR0` to `1`. + // + // This is safe because the CPU supports `xsave` + // and the OS has set `osxsave`. + let xcr0 = unsafe { _xgetbv(0) }; + // Test `XCR0.SSE[1]` and `XCR0.AVX[2]` with the mask `0b110 == 6`: + let os_avx_support = xcr0 & 6 == 6; + // Test `XCR0.AVX-512[7:5]` with the mask `0b1110_0000 == 0xe0`: + let os_avx512_support = xcr0 & 0xe0 == 0xe0; + // Test `XCR0.AMX[18:17]` with the mask `0b110_0000_0000_0000_0000 == 0x60000` + let os_amx_support = xcr0 & 0x60000 == 0x60000; + + // Only if the OS and the CPU support saving/restoring the AVX + // registers we enable `xsave` support: + if os_avx_support { + // See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED + // FEATURES" in the "Intel® 64 and IA-32 Architectures Software + // Developer’s Manual, Volume 1: Basic Architecture": + // + // "Software enables the XSAVE feature set by setting + // CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4 + // instruction). If this bit is 0, execution of any of XGETBV, + // XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV + // causes an invalid-opcode exception (#UD)" + // + enable(proc_info_ecx, 26, Feature::xsave); + + // For `xsaveopt`, `xsavec`, and `xsaves` we need to query: + // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, + // ECX = 1): + if max_basic_leaf >= 0xd { + let CpuidResult { eax: proc_extended_state1_eax, .. } = + unsafe { __cpuid_count(0xd_u32, 1) }; + enable(proc_extended_state1_eax, 0, Feature::xsaveopt); + enable(proc_extended_state1_eax, 1, Feature::xsavec); + enable(proc_extended_state1_eax, 3, Feature::xsaves); + } + + // FMA (uses 256-bit wide registers): + let fma = enable(proc_info_ecx, 12, Feature::fma); + + // And AVX/AVX2: + enable(proc_info_ecx, 28, Feature::avx); + enable(extended_features_ebx, 5, Feature::avx2); + + // "Short" versions of AVX512 instructions + enable(extended_features_eax_leaf_1, 4, Feature::avxvnni); + enable(extended_features_eax_leaf_1, 23, Feature::avxifma); + enable(extended_features_edx_leaf_1, 4, Feature::avxvnniint8); + enable(extended_features_edx_leaf_1, 5, Feature::avxneconvert); + enable(extended_features_edx_leaf_1, 10, Feature::avxvnniint16); + + enable(extended_features_eax_leaf_1, 0, Feature::sha512); + enable(extended_features_eax_leaf_1, 1, Feature::sm3); + enable(extended_features_eax_leaf_1, 2, Feature::sm4); + + // For AVX-512 the OS also needs to support saving/restoring + // the extended state, only then we enable AVX-512 support: + // Also, Rust makes `avx512f` imply `fma` and `f16c`, because + // otherwise the assembler is broken. But Intel doesn't guarantee + // that `fma` and `f16c` are available with `avx512f`, so we + // need to check for them separately. + if os_avx512_support && f16c && fma { + enable(extended_features_ebx, 16, Feature::avx512f); + enable(extended_features_ebx, 17, Feature::avx512dq); + enable(extended_features_ebx, 21, Feature::avx512ifma); + enable(extended_features_ebx, 26, Feature::avx512pf); + enable(extended_features_ebx, 27, Feature::avx512er); + enable(extended_features_ebx, 28, Feature::avx512cd); + enable(extended_features_ebx, 30, Feature::avx512bw); + enable(extended_features_ebx, 31, Feature::avx512vl); + enable(extended_features_ecx, 1, Feature::avx512vbmi); + enable(extended_features_ecx, 6, Feature::avx512vbmi2); + enable(extended_features_ecx, 11, Feature::avx512vnni); + enable(extended_features_ecx, 12, Feature::avx512bitalg); + enable(extended_features_ecx, 14, Feature::avx512vpopcntdq); + enable(extended_features_edx, 8, Feature::avx512vp2intersect); + enable(extended_features_edx, 23, Feature::avx512fp16); + enable(extended_features_eax_leaf_1, 5, Feature::avx512bf16); + } + } + + if os_amx_support { + enable(extended_features_edx, 24, Feature::amx_tile); + enable(extended_features_edx, 25, Feature::amx_int8); + enable(extended_features_edx, 22, Feature::amx_bf16); + enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16); + enable(extended_features_edx_leaf_1, 8, Feature::amx_complex); + + if max_basic_leaf >= 0x1e { + let CpuidResult { eax: amx_feature_flags_eax, .. } = + unsafe { __cpuid_count(0x1e_u32, 1) }; + + enable(amx_feature_flags_eax, 4, Feature::amx_fp8); + enable(amx_feature_flags_eax, 5, Feature::amx_transpose); + enable(amx_feature_flags_eax, 6, Feature::amx_tf32); + enable(amx_feature_flags_eax, 7, Feature::amx_avx512); + enable(amx_feature_flags_eax, 8, Feature::amx_movrs); + } + } + } + } + + // This detects ABM on AMD CPUs and LZCNT on Intel CPUs. + // On intel CPUs with popcnt, lzcnt implements the + // "missing part" of ABM, so we map both to the same + // internal feature. + // + // The `is_x86_feature_detected!("lzcnt")` macro then + // internally maps to Feature::abm. + enable(extended_proc_info_ecx, 5, Feature::lzcnt); + + // As Hygon Dhyana originates from AMD technology and shares most of the architecture with + // AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series + // number(Family 18h). + // + // For CPUID feature bits, Hygon Dhyana(family 18h) share the same definition with AMD + // family 17h. + // + // Related AMD CPUID specification is https://www.amd.com/system/files/TechDocs/25481.pdf. + // Related Hygon kernel patch can be found on + // http://lkml.kernel.org/r/5ce86123a7b9dad925ac583d88d2f921040e859b.1538583282.git.puwen@hygon.cn + if vendor_id == *b"AuthenticAMD" || vendor_id == *b"HygonGenuine" { + // These features are available on AMD arch CPUs: + enable(extended_proc_info_ecx, 6, Feature::sse4a); + enable(extended_proc_info_ecx, 21, Feature::tbm); + enable(extended_proc_info_ecx, 11, Feature::xop); + } + } + + // Unfortunately, some Skylake chips erroneously report support for BMI1 and + // BMI2 without actual support. These chips don't support AVX, and it seems + // that all Intel chips with non-erroneous support BMI do (I didn't check + // other vendors), so we can disable these flags for chips that don't also + // report support for AVX. + // + // It's possible this will pessimize future chips that do support BMI and + // not AVX, but this seems minor compared to a hard crash you get when + // executing an unsupported instruction (to put it another way, it's safe + // for us to under-report CPU features, but not to over-report them). Still, + // to limit any impact this may have in the future, we only do this for + // Intel chips, as it's a bug only present in their chips. + // + // This bug is documented as `SKL052` in the errata section of this document: + // http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/desktop-6th-gen-core-family-spec-update.pdf + if vendor_id == *b"GenuineIntel" && !value.test(Feature::avx as u32) { + value.unset(Feature::bmi1 as u32); + value.unset(Feature::bmi2 as u32); + } + + value +} diff --git a/library/std_detect/src/detect/test_data/linux-artificial-aarch64.auxv b/library/std_detect/src/detect/test_data/linux-artificial-aarch64.auxv new file mode 100644 index 00000000000..ec826afcf38 --- /dev/null +++ b/library/std_detect/src/detect/test_data/linux-artificial-aarch64.auxv Binary files differdiff --git a/library/std_detect/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv b/library/std_detect/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv new file mode 100644 index 00000000000..95537b73f20 --- /dev/null +++ b/library/std_detect/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv Binary files differdiff --git a/library/std_detect/src/detect/test_data/linux-hwcap2-aarch64.auxv b/library/std_detect/src/detect/test_data/linux-hwcap2-aarch64.auxv new file mode 100644 index 00000000000..1d87264b221 --- /dev/null +++ b/library/std_detect/src/detect/test_data/linux-hwcap2-aarch64.auxv Binary files differdiff --git a/library/std_detect/src/detect/test_data/linux-no-hwcap2-aarch64.auxv b/library/std_detect/src/detect/test_data/linux-no-hwcap2-aarch64.auxv new file mode 100644 index 00000000000..35f01cc767c --- /dev/null +++ b/library/std_detect/src/detect/test_data/linux-no-hwcap2-aarch64.auxv Binary files differdiff --git a/library/std_detect/src/detect/test_data/linux-rpi3.auxv b/library/std_detect/src/detect/test_data/linux-rpi3.auxv new file mode 100644 index 00000000000..0538e661f63 --- /dev/null +++ b/library/std_detect/src/detect/test_data/linux-rpi3.auxv Binary files differdiff --git a/library/std_detect/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv b/library/std_detect/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv new file mode 100644 index 00000000000..75abc02d178 --- /dev/null +++ b/library/std_detect/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv Binary files differdiff --git a/library/std_detect/src/lib.rs b/library/std_detect/src/lib.rs new file mode 100644 index 00000000000..ab1b77bad5b --- /dev/null +++ b/library/std_detect/src/lib.rs @@ -0,0 +1,36 @@ +//! Run-time feature detection for the Rust standard library. +//! +//! To detect whether a feature is enabled in the system running the binary +//! use one of the appropriate macro for the target: +//! +//! * `x86` and `x86_64`: [`is_x86_feature_detected`] +//! * `arm`: [`is_arm_feature_detected`] +//! * `aarch64`: [`is_aarch64_feature_detected`] +//! * `riscv`: [`is_riscv_feature_detected`] +//! * `mips`: [`is_mips_feature_detected`] +//! * `mips64`: [`is_mips64_feature_detected`] +//! * `powerpc`: [`is_powerpc_feature_detected`] +//! * `powerpc64`: [`is_powerpc64_feature_detected`] +//! * `loongarch`: [`is_loongarch_feature_detected`] +//! * `s390x`: [`is_s390x_feature_detected`] + +#![unstable(feature = "stdarch_internal", issue = "none")] +#![feature(staged_api, doc_cfg, allow_internal_unstable)] +#![deny(rust_2018_idioms)] +#![allow(clippy::shadow_reuse)] +#![cfg_attr(test, allow(unused_imports))] +#![no_std] +#![allow(internal_features)] + +#[cfg(test)] +#[macro_use] +extern crate std; + +// rust-lang/rust#83888: removing `extern crate` gives an error that `vec_spare> +#[cfg_attr(feature = "std_detect_file_io", allow(unused_extern_crates))] +#[cfg(feature = "std_detect_file_io")] +extern crate alloc; + +#[doc(hidden)] +#[unstable(feature = "stdarch_internal", issue = "none")] +pub mod detect; diff --git a/library/std_detect/tests/cpu-detection.rs b/library/std_detect/tests/cpu-detection.rs new file mode 100644 index 00000000000..5ad32d83237 --- /dev/null +++ b/library/std_detect/tests/cpu-detection.rs @@ -0,0 +1,334 @@ +#![allow(internal_features)] +#![feature(stdarch_internal)] +#![cfg_attr(target_arch = "arm", feature(stdarch_arm_feature_detection))] +#![cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + feature(stdarch_aarch64_feature_detection) +)] +#![cfg_attr( + any(target_arch = "riscv32", target_arch = "riscv64"), + feature(stdarch_riscv_feature_detection) +)] +#![cfg_attr(target_arch = "powerpc", feature(stdarch_powerpc_feature_detection))] +#![cfg_attr(target_arch = "powerpc64", feature(stdarch_powerpc_feature_detection))] +#![cfg_attr(target_arch = "s390x", feature(stdarch_s390x_feature_detection))] +#![allow(clippy::unwrap_used, clippy::use_debug, clippy::print_stdout)] + +#[cfg_attr( + any( + target_arch = "arm", + target_arch = "aarch64", + target_arch = "arm64ec", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ), + macro_use +)] +#[cfg(any( + target_arch = "arm", + target_arch = "aarch64", + target_arch = "arm64ec", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", +))] +extern crate std_detect; + +#[test] +fn all() { + for (f, e) in std_detect::detect::features() { + println!("{f}: {e}"); + } +} + +#[test] +#[cfg(all(target_arch = "arm", target_os = "freebsd"))] +fn arm_freebsd() { + println!("neon: {}", is_arm_feature_detected!("neon")); + println!("pmull: {}", is_arm_feature_detected!("pmull")); + println!("crc: {}", is_arm_feature_detected!("crc")); + println!("aes: {}", is_arm_feature_detected!("aes")); + println!("sha2: {}", is_arm_feature_detected!("sha2")); +} + +#[test] +#[cfg(all(target_arch = "arm", any(target_os = "linux", target_os = "android")))] +fn arm_linux() { + println!("neon: {}", is_arm_feature_detected!("neon")); + println!("pmull: {}", is_arm_feature_detected!("pmull")); + println!("crc: {}", is_arm_feature_detected!("crc")); + println!("aes: {}", is_arm_feature_detected!("aes")); + println!("sha2: {}", is_arm_feature_detected!("sha2")); + println!("dotprod: {}", is_arm_feature_detected!("dotprod")); + println!("i8mm: {}", is_arm_feature_detected!("i8mm")); +} + +#[test] +#[cfg(all(target_arch = "aarch64", any(target_os = "linux", target_os = "android")))] +fn aarch64_linux() { + println!("asimd: {}", is_aarch64_feature_detected!("asimd")); + println!("neon: {}", is_aarch64_feature_detected!("neon")); + println!("pmull: {}", is_aarch64_feature_detected!("pmull")); + println!("fp: {}", is_aarch64_feature_detected!("fp")); + println!("fp16: {}", is_aarch64_feature_detected!("fp16")); + println!("sve: {}", is_aarch64_feature_detected!("sve")); + println!("crc: {}", is_aarch64_feature_detected!("crc")); + println!("lse: {}", is_aarch64_feature_detected!("lse")); + println!("lse2: {}", is_aarch64_feature_detected!("lse2")); + println!("lse128: {}", is_aarch64_feature_detected!("lse128")); + println!("rdm: {}", is_aarch64_feature_detected!("rdm")); + println!("rcpc: {}", is_aarch64_feature_detected!("rcpc")); + println!("rcpc2: {}", is_aarch64_feature_detected!("rcpc2")); + println!("rcpc3: {}", is_aarch64_feature_detected!("rcpc3")); + println!("dotprod: {}", is_aarch64_feature_detected!("dotprod")); + println!("tme: {}", is_aarch64_feature_detected!("tme")); + println!("fhm: {}", is_aarch64_feature_detected!("fhm")); + println!("dit: {}", is_aarch64_feature_detected!("dit")); + println!("flagm: {}", is_aarch64_feature_detected!("flagm")); + println!("flagm2: {}", is_aarch64_feature_detected!("flagm2")); + println!("ssbs: {}", is_aarch64_feature_detected!("ssbs")); + println!("sb: {}", is_aarch64_feature_detected!("sb")); + println!("paca: {}", is_aarch64_feature_detected!("paca")); + println!("pacg: {}", is_aarch64_feature_detected!("pacg")); + // println!("pauth-lr: {}", is_aarch64_feature_detected!("pauth-lr")); + println!("dpb: {}", is_aarch64_feature_detected!("dpb")); + println!("dpb2: {}", is_aarch64_feature_detected!("dpb2")); + println!("sve-b16b16: {}", is_aarch64_feature_detected!("sve-b16b16")); + println!("sve2: {}", is_aarch64_feature_detected!("sve2")); + println!("sve2p1: {}", is_aarch64_feature_detected!("sve2p1")); + println!("sve2-aes: {}", is_aarch64_feature_detected!("sve2-aes")); + println!("sve2-sm4: {}", is_aarch64_feature_detected!("sve2-sm4")); + println!("sve2-sha3: {}", is_aarch64_feature_detected!("sve2-sha3")); + println!("sve2-bitperm: {}", is_aarch64_feature_detected!("sve2-bitperm")); + println!("frintts: {}", is_aarch64_feature_detected!("frintts")); + println!("i8mm: {}", is_aarch64_feature_detected!("i8mm")); + println!("f32mm: {}", is_aarch64_feature_detected!("f32mm")); + println!("f64mm: {}", is_aarch64_feature_detected!("f64mm")); + println!("bf16: {}", is_aarch64_feature_detected!("bf16")); + println!("rand: {}", is_aarch64_feature_detected!("rand")); + println!("bti: {}", is_aarch64_feature_detected!("bti")); + println!("mte: {}", is_aarch64_feature_detected!("mte")); + println!("jsconv: {}", is_aarch64_feature_detected!("jsconv")); + println!("fcma: {}", is_aarch64_feature_detected!("fcma")); + println!("aes: {}", is_aarch64_feature_detected!("aes")); + println!("sha2: {}", is_aarch64_feature_detected!("sha2")); + println!("sha3: {}", is_aarch64_feature_detected!("sha3")); + println!("sm4: {}", is_aarch64_feature_detected!("sm4")); + println!("hbc: {}", is_aarch64_feature_detected!("hbc")); + println!("mops: {}", is_aarch64_feature_detected!("mops")); + println!("ecv: {}", is_aarch64_feature_detected!("ecv")); + println!("cssc: {}", is_aarch64_feature_detected!("cssc")); + println!("fpmr: {}", is_aarch64_feature_detected!("fpmr")); + println!("lut: {}", is_aarch64_feature_detected!("lut")); + println!("faminmax: {}", is_aarch64_feature_detected!("faminmax")); + println!("fp8: {}", is_aarch64_feature_detected!("fp8")); + println!("fp8fma: {}", is_aarch64_feature_detected!("fp8fma")); + println!("fp8dot4: {}", is_aarch64_feature_detected!("fp8dot4")); + println!("fp8dot2: {}", is_aarch64_feature_detected!("fp8dot2")); + println!("wfxt: {}", is_aarch64_feature_detected!("wfxt")); + println!("sme: {}", is_aarch64_feature_detected!("sme")); + println!("sme-b16b16: {}", is_aarch64_feature_detected!("sme-b16b16")); + println!("sme-i16i64: {}", is_aarch64_feature_detected!("sme-i16i64")); + println!("sme-f64f64: {}", is_aarch64_feature_detected!("sme-f64f64")); + println!("sme-fa64: {}", is_aarch64_feature_detected!("sme-fa64")); + println!("sme2: {}", is_aarch64_feature_detected!("sme2")); + println!("sme2p1: {}", is_aarch64_feature_detected!("sme2p1")); + println!("sme-f16f16: {}", is_aarch64_feature_detected!("sme-f16f16")); + println!("sme-lutv2: {}", is_aarch64_feature_detected!("sme-lutv2")); + println!("sme-f8f16: {}", is_aarch64_feature_detected!("sme-f8f16")); + println!("sme-f8f32: {}", is_aarch64_feature_detected!("sme-f8f32")); + println!("ssve-fp8fma: {}", is_aarch64_feature_detected!("ssve-fp8fma")); + println!("ssve-fp8dot4: {}", is_aarch64_feature_detected!("ssve-fp8dot4")); + println!("ssve-fp8dot2: {}", is_aarch64_feature_detected!("ssve-fp8dot2")); +} + +#[test] +#[cfg(all(any(target_arch = "aarch64", target_arch = "arm64ec"), target_os = "windows"))] +fn aarch64_windows() { + println!("asimd: {:?}", is_aarch64_feature_detected!("asimd")); + println!("fp: {:?}", is_aarch64_feature_detected!("fp")); + println!("crc: {:?}", is_aarch64_feature_detected!("crc")); + println!("lse: {:?}", is_aarch64_feature_detected!("lse")); + println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod")); + println!("jsconv: {:?}", is_aarch64_feature_detected!("jsconv")); + println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc")); + println!("aes: {:?}", is_aarch64_feature_detected!("aes")); + println!("pmull: {:?}", is_aarch64_feature_detected!("pmull")); + println!("sha2: {:?}", is_aarch64_feature_detected!("sha2")); +} + +#[test] +#[cfg(all(target_arch = "aarch64", any(target_os = "freebsd", target_os = "openbsd")))] +fn aarch64_bsd() { + println!("asimd: {:?}", is_aarch64_feature_detected!("asimd")); + println!("pmull: {:?}", is_aarch64_feature_detected!("pmull")); + println!("fp: {:?}", is_aarch64_feature_detected!("fp")); + println!("fp16: {:?}", is_aarch64_feature_detected!("fp16")); + println!("sve: {:?}", is_aarch64_feature_detected!("sve")); + println!("crc: {:?}", is_aarch64_feature_detected!("crc")); + println!("lse: {:?}", is_aarch64_feature_detected!("lse")); + println!("lse2: {:?}", is_aarch64_feature_detected!("lse2")); + println!("rdm: {:?}", is_aarch64_feature_detected!("rdm")); + println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc")); + println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod")); + println!("tme: {:?}", is_aarch64_feature_detected!("tme")); + println!("paca: {:?}", is_aarch64_feature_detected!("paca")); + println!("pacg: {:?}", is_aarch64_feature_detected!("pacg")); + println!("aes: {:?}", is_aarch64_feature_detected!("aes")); + println!("sha2: {:?}", is_aarch64_feature_detected!("sha2")); +} + +#[test] +#[cfg(all(target_arch = "aarch64", target_vendor = "apple"))] +fn aarch64_darwin() { + println!("asimd: {:?}", is_aarch64_feature_detected!("asimd")); + println!("fp: {:?}", is_aarch64_feature_detected!("fp")); + println!("fp16: {:?}", is_aarch64_feature_detected!("fp16")); + println!("pmull: {:?}", is_aarch64_feature_detected!("pmull")); + println!("crc: {:?}", is_aarch64_feature_detected!("crc")); + println!("lse: {:?}", is_aarch64_feature_detected!("lse")); + println!("lse2: {:?}", is_aarch64_feature_detected!("lse2")); + println!("rdm: {:?}", is_aarch64_feature_detected!("rdm")); + println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc")); + println!("rcpc2: {:?}", is_aarch64_feature_detected!("rcpc2")); + println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod")); + println!("fhm: {:?}", is_aarch64_feature_detected!("fhm")); + println!("flagm: {:?}", is_aarch64_feature_detected!("flagm")); + println!("ssbs: {:?}", is_aarch64_feature_detected!("ssbs")); + println!("sb: {:?}", is_aarch64_feature_detected!("sb")); + println!("paca: {:?}", is_aarch64_feature_detected!("paca")); + println!("dpb: {:?}", is_aarch64_feature_detected!("dpb")); + println!("dpb2: {:?}", is_aarch64_feature_detected!("dpb2")); + println!("frintts: {:?}", is_aarch64_feature_detected!("frintts")); + println!("i8mm: {:?}", is_aarch64_feature_detected!("i8mm")); + println!("bf16: {:?}", is_aarch64_feature_detected!("bf16")); + println!("bti: {:?}", is_aarch64_feature_detected!("bti")); + println!("fcma: {:?}", is_aarch64_feature_detected!("fcma")); + println!("jsconv: {:?}", is_aarch64_feature_detected!("jsconv")); + println!("aes: {:?}", is_aarch64_feature_detected!("aes")); + println!("sha2: {:?}", is_aarch64_feature_detected!("sha2")); + println!("sha3: {:?}", is_aarch64_feature_detected!("sha3")); +} + +#[test] +#[cfg(all( + any(target_arch = "riscv32", target_arch = "riscv64"), + any(target_os = "linux", target_os = "android") +))] +fn riscv_linux() { + println!("rv32i: {}", is_riscv_feature_detected!("rv32i")); + println!("rv32e: {}", is_riscv_feature_detected!("rv32e")); + println!("rv64i: {}", is_riscv_feature_detected!("rv64i")); + println!("rv128i: {}", is_riscv_feature_detected!("rv128i")); + println!("unaligned-scalar-mem: {}", is_riscv_feature_detected!("unaligned-scalar-mem")); + println!("unaligned-vector-mem: {}", is_riscv_feature_detected!("unaligned-vector-mem")); + println!("zicsr: {}", is_riscv_feature_detected!("zicsr")); + println!("zicntr: {}", is_riscv_feature_detected!("zicntr")); + println!("zihpm: {}", is_riscv_feature_detected!("zihpm")); + println!("zifencei: {}", is_riscv_feature_detected!("zifencei")); + println!("zihintntl: {}", is_riscv_feature_detected!("zihintntl")); + println!("zihintpause: {}", is_riscv_feature_detected!("zihintpause")); + println!("zimop: {}", is_riscv_feature_detected!("zimop")); + println!("zicbom: {}", is_riscv_feature_detected!("zicbom")); + println!("zicboz: {}", is_riscv_feature_detected!("zicboz")); + println!("zicond: {}", is_riscv_feature_detected!("zicond")); + println!("m: {}", is_riscv_feature_detected!("m")); + println!("a: {}", is_riscv_feature_detected!("a")); + println!("zalrsc: {}", is_riscv_feature_detected!("zalrsc")); + println!("zaamo: {}", is_riscv_feature_detected!("zaamo")); + println!("zawrs: {}", is_riscv_feature_detected!("zawrs")); + println!("zacas: {}", is_riscv_feature_detected!("zacas")); + println!("zam: {}", is_riscv_feature_detected!("zam")); + println!("ztso: {}", is_riscv_feature_detected!("ztso")); + println!("f: {}", is_riscv_feature_detected!("f")); + println!("d: {}", is_riscv_feature_detected!("d")); + println!("q: {}", is_riscv_feature_detected!("q")); + println!("zfh: {}", is_riscv_feature_detected!("zfh")); + println!("zfhmin: {}", is_riscv_feature_detected!("zfhmin")); + println!("zfa: {}", is_riscv_feature_detected!("zfa")); + println!("zfbfmin: {}", is_riscv_feature_detected!("zfbfmin")); + println!("zfinx: {}", is_riscv_feature_detected!("zfinx")); + println!("zdinx: {}", is_riscv_feature_detected!("zdinx")); + println!("zhinx: {}", is_riscv_feature_detected!("zhinx")); + println!("zhinxmin: {}", is_riscv_feature_detected!("zhinxmin")); + println!("c: {}", is_riscv_feature_detected!("c")); + println!("zca: {}", is_riscv_feature_detected!("zca")); + println!("zcf: {}", is_riscv_feature_detected!("zcf")); + println!("zcd: {}", is_riscv_feature_detected!("zcd")); + println!("zcb: {}", is_riscv_feature_detected!("zcb")); + println!("zcmop: {}", is_riscv_feature_detected!("zcmop")); + println!("b: {}", is_riscv_feature_detected!("b")); + println!("zba: {}", is_riscv_feature_detected!("zba")); + println!("zbb: {}", is_riscv_feature_detected!("zbb")); + println!("zbc: {}", is_riscv_feature_detected!("zbc")); + println!("zbs: {}", is_riscv_feature_detected!("zbs")); + println!("zbkb: {}", is_riscv_feature_detected!("zbkb")); + println!("zbkc: {}", is_riscv_feature_detected!("zbkc")); + println!("zbkx: {}", is_riscv_feature_detected!("zbkx")); + println!("zknd: {}", is_riscv_feature_detected!("zknd")); + println!("zkne: {}", is_riscv_feature_detected!("zkne")); + println!("zknh: {}", is_riscv_feature_detected!("zknh")); + println!("zksed: {}", is_riscv_feature_detected!("zksed")); + println!("zksh: {}", is_riscv_feature_detected!("zksh")); + println!("zkr: {}", is_riscv_feature_detected!("zkr")); + println!("zkn: {}", is_riscv_feature_detected!("zkn")); + println!("zks: {}", is_riscv_feature_detected!("zks")); + println!("zk: {}", is_riscv_feature_detected!("zk")); + println!("zkt: {}", is_riscv_feature_detected!("zkt")); + println!("v: {}", is_riscv_feature_detected!("v")); + println!("zve32x: {}", is_riscv_feature_detected!("zve32x")); + println!("zve32f: {}", is_riscv_feature_detected!("zve32f")); + println!("zve64x: {}", is_riscv_feature_detected!("zve64x")); + println!("zve64f: {}", is_riscv_feature_detected!("zve64f")); + println!("zve64d: {}", is_riscv_feature_detected!("zve64d")); + println!("zvfh: {}", is_riscv_feature_detected!("zvfh")); + println!("zvfhmin: {}", is_riscv_feature_detected!("zvfhmin")); + println!("zvfbfmin: {}", is_riscv_feature_detected!("zvfbfmin")); + println!("zvfbfwma: {}", is_riscv_feature_detected!("zvfbfwma")); + println!("zvbb: {}", is_riscv_feature_detected!("zvbb")); + println!("zvbc: {}", is_riscv_feature_detected!("zvbc")); + println!("zvkb: {}", is_riscv_feature_detected!("zvkb")); + println!("zvkg: {}", is_riscv_feature_detected!("zvkg")); + println!("zvkned: {}", is_riscv_feature_detected!("zvkned")); + println!("zvknha: {}", is_riscv_feature_detected!("zvknha")); + println!("zvknhb: {}", is_riscv_feature_detected!("zvknhb")); + println!("zvksed: {}", is_riscv_feature_detected!("zvksed")); + println!("zvksh: {}", is_riscv_feature_detected!("zvksh")); + println!("zvkn: {}", is_riscv_feature_detected!("zvkn")); + println!("zvknc: {}", is_riscv_feature_detected!("zvknc")); + println!("zvkng: {}", is_riscv_feature_detected!("zvkng")); + println!("zvks: {}", is_riscv_feature_detected!("zvks")); + println!("zvksc: {}", is_riscv_feature_detected!("zvksc")); + println!("zvksg: {}", is_riscv_feature_detected!("zvksg")); + println!("zvkt: {}", is_riscv_feature_detected!("zvkt")); + println!("j: {}", is_riscv_feature_detected!("j")); + println!("p: {}", is_riscv_feature_detected!("p")); +} + +#[test] +#[cfg(all(target_arch = "powerpc", target_os = "linux"))] +fn powerpc_linux() { + println!("altivec: {}", is_powerpc_feature_detected!("altivec")); + println!("vsx: {}", is_powerpc_feature_detected!("vsx")); + println!("power8: {}", is_powerpc_feature_detected!("power8")); +} + +#[test] +#[cfg(all(target_arch = "powerpc64", any(target_os = "linux", target_os = "freebsd"),))] +fn powerpc64_linux_or_freebsd() { + println!("altivec: {}", is_powerpc64_feature_detected!("altivec")); + println!("vsx: {}", is_powerpc64_feature_detected!("vsx")); + println!("power8: {}", is_powerpc64_feature_detected!("power8")); + println!("power9: {}", is_powerpc64_feature_detected!("power9")); +} + +#[test] +#[cfg(all(target_arch = "s390x", target_os = "linux",))] +fn s390x_linux() { + println!("vector: {}", is_s390x_feature_detected!("vector")); +} diff --git a/library/std_detect/tests/macro_trailing_commas.rs b/library/std_detect/tests/macro_trailing_commas.rs new file mode 100644 index 00000000000..2fee0abdd57 --- /dev/null +++ b/library/std_detect/tests/macro_trailing_commas.rs @@ -0,0 +1,109 @@ +#![allow(internal_features)] +#![cfg_attr( + any( + target_arch = "arm", + target_arch = "aarch64", + target_arch = "arm64ec", + target_arch = "x86", + target_arch = "x86_64", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "loongarch32", + target_arch = "loongarch64" + ), + feature(stdarch_internal) +)] +#![cfg_attr(target_arch = "arm", feature(stdarch_arm_feature_detection))] +#![cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + feature(stdarch_aarch64_feature_detection) +)] +#![cfg_attr( + any(target_arch = "powerpc", target_arch = "powerpc64"), + feature(stdarch_powerpc_feature_detection) +)] +#![cfg_attr(target_arch = "s390x", feature(stdarch_s390x_feature_detection))] +#![cfg_attr( + any(target_arch = "riscv32", target_arch = "riscv64"), + feature(stdarch_riscv_feature_detection) +)] +#![cfg_attr( + any(target_arch = "loongarch32", target_arch = "loongarch64"), + feature(stdarch_loongarch_feature_detection) +)] + +#[cfg(any( + target_arch = "arm", + target_arch = "aarch64", + target_arch = "arm64ec", + target_arch = "x86", + target_arch = "x86_64", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "loongarch32", + target_arch = "loongarch64" +))] +#[macro_use] +extern crate std_detect; + +#[test] +#[cfg(target_arch = "arm")] +fn arm() { + let _ = is_arm_feature_detected!("neon"); + let _ = is_arm_feature_detected!("neon",); +} + +#[test] +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +fn aarch64() { + let _ = is_aarch64_feature_detected!("fp"); + let _ = is_aarch64_feature_detected!("fp",); +} + +#[test] +#[cfg(any(target_arch = "loongarch32", target_arch = "loongarch64"))] +fn loongarch() { + let _ = is_loongarch_feature_detected!("lsx"); + let _ = is_loongarch_feature_detected!("lsx",); +} + +#[test] +#[cfg(target_arch = "powerpc")] +fn powerpc() { + let _ = is_powerpc_feature_detected!("altivec"); + let _ = is_powerpc_feature_detected!("altivec",); +} + +#[test] +#[cfg(target_arch = "powerpc64")] +fn powerpc64() { + let _ = is_powerpc64_feature_detected!("altivec"); + let _ = is_powerpc64_feature_detected!("altivec",); +} + +#[test] +#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] +fn riscv() { + let _ = is_riscv_feature_detected!("zk"); + let _ = is_riscv_feature_detected!("zk",); +} + +#[test] +#[cfg(target_arch = "s390x")] +fn s390x() { + let _ = is_s390x_feature_detected!("vector"); + let _ = is_s390x_feature_detected!("vector",); +} + +#[test] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn x86() { + let _ = is_x86_feature_detected!("sse"); + let _ = is_x86_feature_detected!("sse",); +} diff --git a/library/std_detect/tests/x86-specific.rs b/library/std_detect/tests/x86-specific.rs new file mode 100644 index 00000000000..2ed2bb2a99e --- /dev/null +++ b/library/std_detect/tests/x86-specific.rs @@ -0,0 +1,91 @@ +#![cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#![allow(internal_features)] +#![feature(stdarch_internal, x86_amx_intrinsics, xop_target_feature, movrs_target_feature)] + +#[macro_use] +extern crate std_detect; + +#[test] +fn dump() { + println!("aes: {:?}", is_x86_feature_detected!("aes")); + println!("pclmulqdq: {:?}", is_x86_feature_detected!("pclmulqdq")); + println!("rdrand: {:?}", is_x86_feature_detected!("rdrand")); + println!("rdseed: {:?}", is_x86_feature_detected!("rdseed")); + println!("tsc: {:?}", is_x86_feature_detected!("tsc")); + println!("sse: {:?}", is_x86_feature_detected!("sse")); + println!("sse2: {:?}", is_x86_feature_detected!("sse2")); + println!("sse3: {:?}", is_x86_feature_detected!("sse3")); + println!("ssse3: {:?}", is_x86_feature_detected!("ssse3")); + println!("sse4.1: {:?}", is_x86_feature_detected!("sse4.1")); + println!("sse4.2: {:?}", is_x86_feature_detected!("sse4.2")); + println!("sse4a: {:?}", is_x86_feature_detected!("sse4a")); + println!("sha: {:?}", is_x86_feature_detected!("sha")); + println!("f16c: {:?}", is_x86_feature_detected!("f16c")); + println!("avx: {:?}", is_x86_feature_detected!("avx")); + println!("avx2: {:?}", is_x86_feature_detected!("avx2")); + println!("sha512: {:?}", is_x86_feature_detected!("sha512")); + println!("sm3: {:?}", is_x86_feature_detected!("sm3")); + println!("sm4: {:?}", is_x86_feature_detected!("sm4")); + println!("avx512f: {:?}", is_x86_feature_detected!("avx512f")); + println!("avx512cd: {:?}", is_x86_feature_detected!("avx512cd")); + println!("avx512er: {:?}", is_x86_feature_detected!("avx512er")); + println!("avx512pf: {:?}", is_x86_feature_detected!("avx512pf")); + println!("avx512bw: {:?}", is_x86_feature_detected!("avx512bw")); + println!("avx512dq: {:?}", is_x86_feature_detected!("avx512dq")); + println!("avx512vl: {:?}", is_x86_feature_detected!("avx512vl")); + println!("avx512_ifma: {:?}", is_x86_feature_detected!("avx512ifma")); + println!("avx512vbmi {:?}", is_x86_feature_detected!("avx512vbmi")); + println!("avx512_vpopcntdq: {:?}", is_x86_feature_detected!("avx512vpopcntdq")); + println!("avx512vbmi2: {:?}", is_x86_feature_detected!("avx512vbmi2")); + println!("gfni: {:?}", is_x86_feature_detected!("gfni")); + println!("vaes: {:?}", is_x86_feature_detected!("vaes")); + println!("vpclmulqdq: {:?}", is_x86_feature_detected!("vpclmulqdq")); + println!("avx512vnni: {:?}", is_x86_feature_detected!("avx512vnni")); + println!("avx512bitalg: {:?}", is_x86_feature_detected!("avx512bitalg")); + println!("avx512bf16: {:?}", is_x86_feature_detected!("avx512bf16")); + println!("avx512vp2intersect: {:?}", is_x86_feature_detected!("avx512vp2intersect")); + println!("avx512fp16: {:?}", is_x86_feature_detected!("avx512fp16")); + println!("fma: {:?}", is_x86_feature_detected!("fma")); + println!("abm: {:?}", is_x86_feature_detected!("abm")); + println!("bmi: {:?}", is_x86_feature_detected!("bmi1")); + println!("bmi2: {:?}", is_x86_feature_detected!("bmi2")); + println!("tbm: {:?}", is_x86_feature_detected!("tbm")); + println!("popcnt: {:?}", is_x86_feature_detected!("popcnt")); + println!("lzcnt: {:?}", is_x86_feature_detected!("lzcnt")); + println!("fxsr: {:?}", is_x86_feature_detected!("fxsr")); + println!("xsave: {:?}", is_x86_feature_detected!("xsave")); + println!("xsaveopt: {:?}", is_x86_feature_detected!("xsaveopt")); + println!("xsaves: {:?}", is_x86_feature_detected!("xsaves")); + println!("xsavec: {:?}", is_x86_feature_detected!("xsavec")); + println!("cmpxchg16b: {:?}", is_x86_feature_detected!("cmpxchg16b")); + println!("adx: {:?}", is_x86_feature_detected!("adx")); + println!("rtm: {:?}", is_x86_feature_detected!("rtm")); + println!("movbe: {:?}", is_x86_feature_detected!("movbe")); + println!("avxvnni: {:?}", is_x86_feature_detected!("avxvnni")); + println!("avxvnniint8: {:?}", is_x86_feature_detected!("avxvnniint8")); + println!("avxneconvert: {:?}", is_x86_feature_detected!("avxneconvert")); + println!("avxifma: {:?}", is_x86_feature_detected!("avxifma")); + println!("avxvnniint16: {:?}", is_x86_feature_detected!("avxvnniint16")); + println!("amx-bf16: {:?}", is_x86_feature_detected!("amx-bf16")); + println!("amx-tile: {:?}", is_x86_feature_detected!("amx-tile")); + println!("amx-int8: {:?}", is_x86_feature_detected!("amx-int8")); + println!("amx-fp16: {:?}", is_x86_feature_detected!("amx-fp16")); + println!("amx-complex: {:?}", is_x86_feature_detected!("amx-complex")); + println!("xop: {:?}", is_x86_feature_detected!("xop")); + println!("kl: {:?}", is_x86_feature_detected!("kl")); + println!("widekl: {:?}", is_x86_feature_detected!("widekl")); + println!("movrs: {:?}", is_x86_feature_detected!("movrs")); + println!("amx-fp8: {:?}", is_x86_feature_detected!("amx-fp8")); + println!("amx-transpose: {:?}", is_x86_feature_detected!("amx-transpose")); + println!("amx-tf32: {:?}", is_x86_feature_detected!("amx-tf32")); + println!("amx-avx512: {:?}", is_x86_feature_detected!("amx-avx512")); + println!("amx-movrs: {:?}", is_x86_feature_detected!("amx-movrs")); +} + +#[test] +#[allow(deprecated)] +fn x86_deprecated() { + println!("avx512gfni {:?}", is_x86_feature_detected!("avx512gfni")); + println!("avx512vaes {:?}", is_x86_feature_detected!("avx512vaes")); + println!("avx512vpclmulqdq {:?}", is_x86_feature_detected!("avx512vpclmulqdq")); +} |
