diff options
| author | bors <bors@rust-lang.org> | 2025-06-26 06:18:35 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2025-06-26 06:18:35 +0000 |
| commit | 1e838527f18cd24c81547ce6fbef6815032a80a7 (patch) | |
| tree | 4e6494ae0237d8a9f1e16c665025a6395c8df4b4 /library/stdarch/crates/std_detect/src | |
| parent | bc4376fa73b636eb6f2c7d48b1f731d70f022c4b (diff) | |
| parent | e824005a4f30894f65e2589188d4ad36183f22fb (diff) | |
| download | rust-1e838527f18cd24c81547ce6fbef6815032a80a7.tar.gz rust-1e838527f18cd24c81547ce6fbef6815032a80a7.zip | |
Auto merge of #141899 - Kobzol:stdarch-josh, r=Amanieu
Turn `stdarch` into a Josh subtree In a similar vein as https://github.com/rust-lang/rust/pull/141229, this PR makes the `stdarch` repository a Josh subtree (it was previously a submodule). The initial commit of `stdarch` upon this is based is `5a7342fc16b208b1b16624e886937ed8509a6506`, which is the previous commit SHA of the `stdarch` submodule. The sync was performed according to https://hackmd.io/7pOuxnkdQDaL1Y1FQr65xg. This was decided in https://github.com/rust-lang/stdarch/issues/1655. Test pull PR on my fork: https://github.com/Kobzol/stdarch/pull/1 Test push PR on my fork: https://github.com/Kobzol/rust/pull/59 I plan to use the same Rust (miri-inspired) tooling that we use for `rustc-dev-guide` to enable pulls/pushes on stdarch. Note that this repository currently doesn't have any stdarch-specific tests, so before that, the subtree should only be modified through this repository only when dealing with changes that contain "cyclical dependencies" between stdarch and rustc. The long term vision is to integrate stdarch into rust-lang/rust completely. CC `@Amanieu` try-job: aarch64-apple try-job: aarch64-gnu try-job: `x86_64-msvc-*` try-job: x86_64-gnu try-job: x86_64-gnu-aux
Diffstat (limited to 'library/stdarch/crates/std_detect/src')
43 files changed, 4484 insertions, 0 deletions
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs new file mode 100644 index 00000000000..13570a25c1c --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs @@ -0,0 +1,259 @@ +//! Aarch64 run-time features. + +features! { + @TARGET: aarch64; + @CFG: any(target_arch = "aarch64", target_arch = "arm64ec"); + @MACRO_NAME: is_aarch64_feature_detected; + @MACRO_ATTRS: + /// This macro tests, at runtime, whether an `aarch64` feature is enabled on aarch64 platforms. + /// Currently most features are only supported on linux-based platforms. + /// + /// This macro takes one argument which is a string literal of the feature being tested for. + /// The feature names are mostly taken from their FEAT_* definitions in the [ARM Architecture + /// Reference Manual][docs]. + /// + /// ## Supported arguments + /// + /// * `"aes"` - FEAT_AES & FEAT_PMULL + /// * `"asimd"` or "neon" - FEAT_AdvSIMD + /// * `"bf16"` - FEAT_BF16 + /// * `"bti"` - FEAT_BTI + /// * `"crc"` - FEAT_CRC + /// * `"cssc"` - FEAT_CSSC + /// * `"dit"` - FEAT_DIT + /// * `"dotprod"` - FEAT_DotProd + /// * `"dpb"` - FEAT_DPB + /// * `"dpb2"` - FEAT_DPB2 + /// * `"ecv"` - FEAT_ECV + /// * `"f32mm"` - FEAT_F32MM + /// * `"f64mm"` - FEAT_F64MM + /// * `"faminmax"` - FEAT_FAMINMAX + /// * `"fcma"` - FEAT_FCMA + /// * `"fhm"` - FEAT_FHM + /// * `"flagm"` - FEAT_FLAGM + /// * `"flagm2"` - FEAT_FLAGM2 + /// * `"fp"` - FEAT_FP + /// * `"fp16"` - FEAT_FP16 + /// * `"fp8"` - FEAT_FP8 + /// * `"fp8dot2"` - FEAT_FP8DOT2 + /// * `"fp8dot4"` - FEAT_FP8DOT4 + /// * `"fp8fma"` - FEAT_FP8FMA + /// * `"fpmr"` - FEAT_FPMR + /// * `"frintts"` - FEAT_FRINTTS + /// * `"hbc"` - FEAT_HBC + /// * `"i8mm"` - FEAT_I8MM + /// * `"jsconv"` - FEAT_JSCVT + /// * `"lse"` - FEAT_LSE + /// * `"lse128"` - FEAT_LSE128 + /// * `"lse2"` - FEAT_LSE2 + /// * `"lut"` - FEAT_LUT + /// * `"mops"` - FEAT_MOPS + /// * `"mte"` - FEAT_MTE & FEAT_MTE2 + /// * `"paca"` - FEAT_PAuth (address authentication) + /// * `"pacg"` - FEAT_Pauth (generic authentication) + /// * `"pauth-lr"` - FEAT_PAuth_LR + /// * `"pmull"` - FEAT_PMULL + /// * `"rand"` - FEAT_RNG + /// * `"rcpc"` - FEAT_LRCPC + /// * `"rcpc2"` - FEAT_LRCPC2 + /// * `"rcpc3"` - FEAT_LRCPC3 + /// * `"rdm"` - FEAT_RDM + /// * `"sb"` - FEAT_SB + /// * `"sha2"` - FEAT_SHA1 & FEAT_SHA256 + /// * `"sha3"` - FEAT_SHA512 & FEAT_SHA3 + /// * `"sm4"` - FEAT_SM3 & FEAT_SM4 + /// * `"sme"` - FEAT_SME + /// * `"sme-b16b16"` - FEAT_SME_B16B16 + /// * `"sme-f16f16"` - FEAT_SME_F16F16 + /// * `"sme-f64f64"` - FEAT_SME_F64F64 + /// * `"sme-f8f16"` - FEAT_SME_F8F16 + /// * `"sme-f8f32"` - FEAT_SME_F8F32 + /// * `"sme-fa64"` - FEAT_SME_FA64 + /// * `"sme-i16i64"` - FEAT_SME_I16I64 + /// * `"sme-lutv2"` - FEAT_SME_LUTv2 + /// * `"sme2"` - FEAT_SME2 + /// * `"sme2p1"` - FEAT_SME2p1 + /// * `"ssbs"` - FEAT_SSBS & FEAT_SSBS2 + /// * `"ssve-fp8dot2"` - FEAT_SSVE_FP8DOT2 + /// * `"ssve-fp8dot4"` - FEAT_SSVE_FP8DOT4 + /// * `"ssve-fp8fma"` - FEAT_SSVE_FP8FMA + /// * `"sve"` - FEAT_SVE + /// * `"sve-b16b16"` - FEAT_SVE_B16B16 (SVE or SME Z-targeting instructions) + /// * `"sve2"` - FEAT_SVE2 + /// * `"sve2-aes"` - FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto) + /// * `"sve2-bitperm"` - FEAT_SVE2_BitPerm + /// * `"sve2-sha3"` - FEAT_SVE2_SHA3 + /// * `"sve2-sm4"` - FEAT_SVE2_SM4 + /// * `"sve2p1"` - FEAT_SVE2p1 + /// * `"tme"` - FEAT_TME + /// * `"wfxt"` - FEAT_WFxT + /// + /// [docs]: https://developer.arm.com/documentation/ddi0487/latest + #[stable(feature = "simd_aarch64", since = "1.60.0")] + @BIND_FEATURE_NAME: "asimd"; "neon"; + @NO_RUNTIME_DETECTION: "ras"; + @NO_RUNTIME_DETECTION: "v8.1a"; + @NO_RUNTIME_DETECTION: "v8.2a"; + @NO_RUNTIME_DETECTION: "v8.3a"; + @NO_RUNTIME_DETECTION: "v8.4a"; + @NO_RUNTIME_DETECTION: "v8.5a"; + @NO_RUNTIME_DETECTION: "v8.6a"; + @NO_RUNTIME_DETECTION: "v8.7a"; + @NO_RUNTIME_DETECTION: "v8.8a"; + @NO_RUNTIME_DETECTION: "v8.9a"; + @NO_RUNTIME_DETECTION: "v9.1a"; + @NO_RUNTIME_DETECTION: "v9.2a"; + @NO_RUNTIME_DETECTION: "v9.3a"; + @NO_RUNTIME_DETECTION: "v9.4a"; + @NO_RUNTIME_DETECTION: "v9.5a"; + @NO_RUNTIME_DETECTION: "v9a"; + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] asimd: "neon"; + /// FEAT_AdvSIMD (Advanced SIMD/NEON) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] pmull: "pmull"; + implied by target_features: ["aes"]; + /// FEAT_PMULL (Polynomial Multiply) - Implied by `aes` target_feature + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fp: "fp"; + implied by target_features: ["neon"]; + /// FEAT_FP (Floating point support) - Implied by `neon` target_feature + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] aes: "aes"; + /// FEAT_AES (AES SIMD instructions) & FEAT_PMULL (PMULL{2}, 64-bit operand variants) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] bf16: "bf16"; + /// FEAT_BF16 (BFloat16 type, plus MM instructions, plus ASIMD support) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] bti: "bti"; + /// FEAT_BTI (Branch Target Identification) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] crc: "crc"; + /// FEAT_CRC32 (Cyclic Redundancy Check) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] cssc: "cssc"; + /// FEAT_CSSC (Common Short Sequence Compression instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dit: "dit"; + /// FEAT_DIT (Data Independent Timing instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dpb: "dpb"; + /// FEAT_DPB (aka dcpop - data cache clean to point of persistence) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dpb2: "dpb2"; + /// FEAT_DPB2 (aka dcpodp - data cache clean to point of deep persistence) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dotprod: "dotprod"; + /// FEAT_DotProd (Vector Dot-Product - ASIMDDP) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ecv: "ecv"; + /// FEAT_ECV (Enhanced Counter Virtualization) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] f32mm: "f32mm"; + /// FEAT_F32MM (single-precision matrix multiplication) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] f64mm: "f64mm"; + /// FEAT_F64MM (double-precision matrix multiplication) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] faminmax: "faminmax"; + /// FEAT_FAMINMAX (FAMIN and FAMAX SIMD/SVE/SME instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fcma: "fcma"; + /// FEAT_FCMA (float complex number operations) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fhm: "fhm"; + /// FEAT_FHM (fp16 multiplication instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] flagm: "flagm"; + /// FEAT_FLAGM (flag manipulation instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] flagm2: "flagm2"; + /// FEAT_FLAGM2 (flag manipulation instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fp16: "fp16"; + /// FEAT_FP16 (Half-float support) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8: "fp8"; + /// FEAT_FP8 (F8CVT Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8dot2: "fp8dot2"; + /// FEAT_FP8DOT2 (F8DP2 Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8dot4: "fp8dot4"; + /// FEAT_FP8DOT4 (F8DP4 Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8fma: "fp8fma"; + /// FEAT_FP8FMA (F8FMA Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fpmr: "fpmr"; + without cfg check: true; + /// FEAT_FPMR (Special-purpose AArch64-FPMR register) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] frintts: "frintts"; + /// FEAT_FRINTTS (float to integer rounding instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] hbc: "hbc"; + /// FEAT_HBC (Hinted conditional branches) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] i8mm: "i8mm"; + /// FEAT_I8MM (integer matrix multiplication, plus ASIMD support) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] jsconv: "jsconv"; + /// FEAT_JSCVT (JavaScript float conversion instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] lse: "lse"; + /// FEAT_LSE (Large System Extension - atomics) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] lse128: "lse128"; + /// FEAT_LSE128 (128-bit atomics) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] lse2: "lse2"; + /// FEAT_LSE2 (unaligned and register-pair atomics) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] lut: "lut"; + /// FEAT_LUT (Lookup Table Instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] mops: "mops"; + /// FEAT_MOPS (Standardization of memory operations) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] mte: "mte"; + /// FEAT_MTE & FEAT_MTE2 (Memory Tagging Extension) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] paca: "paca"; + /// FEAT_PAuth (address authentication) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] pacg: "pacg"; + /// FEAT_PAuth (generic authentication) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] pauth_lr: "pauth-lr"; + /// FEAT_PAuth_LR + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rand: "rand"; + /// FEAT_RNG (Random Number Generator) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc: "rcpc"; + /// FEAT_LRCPC (Release consistent Processor consistent) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc2: "rcpc2"; + /// FEAT_LRCPC2 (RCPC with immediate offsets) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] rcpc3: "rcpc3"; + /// FEAT_LRCPC3 (RCPC Instructions v3) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rdm: "rdm"; + /// FEAT_RDM (Rounding Doubling Multiply - ASIMDRDM) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sb: "sb"; + /// FEAT_SB (speculation barrier) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sha2: "sha2"; + /// FEAT_SHA1 & FEAT_SHA256 (SHA1 & SHA2-256 instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sha3: "sha3"; + /// FEAT_SHA512 & FEAT_SHA3 (SHA2-512 & SHA3 instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sm4: "sm4"; + /// FEAT_SM3 & FEAT_SM4 (SM3 & SM4 instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme: "sme"; + /// FEAT_SME (Scalable Matrix Extension) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme2: "sme2"; + /// FEAT_SME2 (SME Version 2) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme2p1: "sme2p1"; + /// FEAT_SME2p1 (SME Version 2.1) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_b16b16: "sme-b16b16"; + /// FEAT_SME_B16B16 + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f16f16: "sme-f16f16"; + /// FEAT_SME_F16F16 (Non-widening half-precision FP16 to FP16 arithmetic for SME2) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f64f64: "sme-f64f64"; + /// FEAT_SME_F64F64 (Double-precision floating-point outer product instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f8f16: "sme-f8f16"; + /// FEAT_SME_F8F16 + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f8f32: "sme-f8f32"; + /// FEAT_SME_F8F32 + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_fa64: "sme-fa64"; + /// FEAT_SME_FA64 (Full A64 instruction set support in Streaming SVE mode) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_i16i64: "sme-i16i64"; + /// FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_lutv2: "sme-lutv2"; + /// FEAT_SME_LUTv2 (LUTI4 Instruction) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] ssbs: "ssbs"; + /// FEAT_SSBS & FEAT_SSBS2 (speculative store bypass safe) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8dot2: "ssve-fp8dot2"; + /// FEAT_SSVE_FP8DOT2 + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8dot4: "ssve-fp8dot4"; + /// FEAT_SSVE_FP8DOT4 + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8fma: "ssve-fp8fma"; + /// FEAT_SSVE_FP8FMA + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve: "sve"; + /// FEAT_SVE (Scalable Vector Extension) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2: "sve2"; + /// FEAT_SVE2 (Scalable Vector Extension 2) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve2p1: "sve2p1"; + /// FEAT_SVE2p1 (Scalable Vector Extension 2.1) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_aes: "sve2-aes"; + /// FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve_b16b16: "sve-b16b16"; + /// FEAT_SVE_B16B16 (SVE or SME Z-targeting instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_bitperm: "sve2-bitperm"; + /// FEAT_SVE_BitPerm (SVE2 bit permutation instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sha3: "sve2-sha3"; + /// FEAT_SVE_SHA3 (SVE2 SHA3 crypto) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sm4: "sve2-sm4"; + /// FEAT_SVE_SM4 (SVE2 SM4 crypto) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] tme: "tme"; + /// FEAT_TME (Transactional Memory Extensions) + @FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] wfxt: "wfxt"; + /// FEAT_WFxT (WFET and WFIT Instructions) +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs new file mode 100644 index 00000000000..c3c8883ce31 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs @@ -0,0 +1,29 @@ +//! Run-time feature detection on ARM Aarch32. + +features! { + @TARGET: arm; + @CFG: target_arch = "arm"; + @MACRO_NAME: is_arm_feature_detected; + @MACRO_ATTRS: + /// Checks if `arm` feature is enabled. + #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] + @NO_RUNTIME_DETECTION: "v7"; + @NO_RUNTIME_DETECTION: "vfp2"; + @NO_RUNTIME_DETECTION: "vfp3"; + @NO_RUNTIME_DETECTION: "vfp4"; + @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] neon: "neon"; + /// ARM Advanced SIMD (NEON) - Aarch32 + @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] pmull: "pmull"; + without cfg check: true; + /// Polynomial Multiply + @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] crc: "crc"; + /// CRC32 (Cyclic Redundancy Check) + @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] aes: "aes"; + /// FEAT_AES (AES instructions) + @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] sha2: "sha2"; + /// FEAT_SHA1 & FEAT_SHA256 (SHA1 & SHA2-256 instructions) + @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] i8mm: "i8mm"; + /// FEAT_I8MM (integer matrix multiplication, plus ASIMD support) + @FEATURE: #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] dotprod: "dotprod"; + /// FEAT_DotProd (Vector Dot-Product - ASIMDDP) +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs b/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs new file mode 100644 index 00000000000..e9d68f6a9bf --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/loongarch.rs @@ -0,0 +1,51 @@ +//! Run-time feature detection on LoongArch. + +features! { + @TARGET: loongarch; + @CFG: target_arch = "loongarch64"; + @MACRO_NAME: is_loongarch_feature_detected; + @MACRO_ATTRS: + /// Checks if `loongarch` feature is enabled. + /// Supported arguments are: + /// + /// * `"f"` + /// * `"d"` + /// * `"frecipe"` + /// * `"div32"` + /// * `"lsx"` + /// * `"lasx"` + /// * `"lam-bh"` + /// * `"lamcas"` + /// * `"ld-seq-sa"` + /// * `"scq"` + /// * `"lbt"` + /// * `"lvz"` + /// * `"ual"` + #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] + @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] f: "f"; + /// F + @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] d: "d"; + /// D + @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] frecipe: "frecipe"; + /// Frecipe + @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] div32: "div32"; + /// Div32 + @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] lsx: "lsx"; + /// LSX + @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] lasx: "lasx"; + /// LASX + @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lam_bh: "lam-bh"; + /// LAM-BH + @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] lamcas: "lamcas"; + /// LAM-CAS + @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] ld_seq_sa: "ld-seq-sa"; + /// LD-SEQ-SA + @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] scq: "scq"; + /// SCQ + @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] lbt: "lbt"; + /// LBT + @FEATURE: #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] lvz: "lvz"; + /// LVZ + @FEATURE: #[unstable(feature = "stdarch_loongarch_feature_detection", issue = "117425")] ual: "ual"; + /// UAL +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs new file mode 100644 index 00000000000..e185fdfcaac --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs @@ -0,0 +1,12 @@ +//! Run-time feature detection on MIPS. + +features! { + @TARGET: mips; + @CFG: target_arch = "mips"; + @MACRO_NAME: is_mips_feature_detected; + @MACRO_ATTRS: + /// Checks if `mips` feature is enabled. + #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] + @FEATURE: #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] msa: "msa"; + /// MIPS SIMD Architecture (MSA) +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs new file mode 100644 index 00000000000..69fe4869d30 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs @@ -0,0 +1,12 @@ +//! Run-time feature detection on MIPS64. + +features! { + @TARGET: mips64; + @CFG: target_arch = "mips64"; + @MACRO_NAME: is_mips64_feature_detected; + @MACRO_ATTRS: + /// Checks if `mips64` feature is enabled. + #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] + @FEATURE: #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] msa: "msa"; + /// MIPS SIMD Architecture (MSA) +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mod.rs b/library/stdarch/crates/std_detect/src/detect/arch/mod.rs new file mode 100644 index 00000000000..d5a13acc028 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/mod.rs @@ -0,0 +1,75 @@ +#![allow(dead_code)] + +use cfg_if::cfg_if; + +// Export the macros for all supported architectures. +#[macro_use] +mod x86; +#[macro_use] +mod arm; +#[macro_use] +mod aarch64; +#[macro_use] +mod riscv; +#[macro_use] +mod powerpc; +#[macro_use] +mod powerpc64; +#[macro_use] +mod mips; +#[macro_use] +mod mips64; +#[macro_use] +mod loongarch; +#[macro_use] +mod s390x; + +cfg_if! { + if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { + #[stable(feature = "simd_x86", since = "1.27.0")] + pub use x86::*; + } else if #[cfg(target_arch = "arm")] { + #[unstable(feature = "stdarch_arm_feature_detection", issue = "111190")] + pub use arm::*; + } else if #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] { + #[stable(feature = "simd_aarch64", since = "1.60.0")] + pub use aarch64::*; + } else if #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] { + #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] + pub use riscv::*; + } else if #[cfg(target_arch = "powerpc")] { + #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] + pub use powerpc::*; + } else if #[cfg(target_arch = "powerpc64")] { + #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] + pub use powerpc64::*; + } else if #[cfg(target_arch = "mips")] { + #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] + pub use mips::*; + } else if #[cfg(target_arch = "mips64")] { + #[unstable(feature = "stdarch_mips_feature_detection", issue = "111188")] + pub use mips64::*; + } else if #[cfg(target_arch = "loongarch64")] { + #[stable(feature = "stdarch_loongarch_feature", since = "1.89.0")] + pub use loongarch::*; + } else if #[cfg(target_arch = "s390x")] { + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + pub use s390x::*; + } else { + // Unimplemented architecture: + #[doc(hidden)] + pub(crate) enum Feature { + Null + } + #[doc(hidden)] + #[unstable(feature = "stdarch_internal", issue = "none")] + pub mod __is_feature_detected {} + + impl Feature { + #[doc(hidden)] + pub(crate) fn from_str(_s: &str) -> Result<Feature, ()> { Err(()) } + #[doc(hidden)] + pub(crate) fn to_str(self) -> &'static str { "" } + } + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs new file mode 100644 index 00000000000..c390993a48a --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs @@ -0,0 +1,30 @@ +//! Run-time feature detection on PowerPC. + +features! { + @TARGET: powerpc; + @CFG: target_arch = "powerpc"; + @MACRO_NAME: is_powerpc_feature_detected; + @MACRO_ATTRS: + /// Checks if `powerpc` feature is enabled. + #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] altivec: "altivec"; + /// Altivec + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] vsx: "vsx"; + /// VSX + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8: "power8"; + without cfg check: true; + /// Power8 + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_altivec: "power8-altivec"; + /// Power8 altivec + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_vector: "power8-vector"; + /// Power8 vector + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_crypto: "power8-crypto"; + /// Power8 crypto + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9: "power9"; + without cfg check: true; + /// Power9 + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9_altivec: "power9-altivec"; + /// Power9 altivec + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9_vector: "power9-vector"; + /// Power9 vector +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs new file mode 100644 index 00000000000..cf05baa6f79 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs @@ -0,0 +1,30 @@ +//! Run-time feature detection on PowerPC64. + +features! { + @TARGET: powerpc64; + @CFG: target_arch = "powerpc64"; + @MACRO_NAME: is_powerpc64_feature_detected; + @MACRO_ATTRS: + /// Checks if `powerpc` feature is enabled. + #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] altivec: "altivec"; + /// Altivec + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] vsx: "vsx"; + /// VSX + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8: "power8"; + without cfg check: true; + /// Power8 + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_altivec: "power8-altivec"; + /// Power8 altivec + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_vector: "power8-vector"; + /// Power8 vector + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power8_crypto: "power8-crypto"; + /// Power8 crypto + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9: "power9"; + without cfg check: true; + /// Power9 + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9_altivec: "power9-altivec"; + /// Power9 altivec + @FEATURE: #[unstable(feature = "stdarch_powerpc_feature_detection", issue = "111191")] power9_vector: "power9-vector"; + /// Power9 vector +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs new file mode 100644 index 00000000000..b86190d7bbf --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs @@ -0,0 +1,344 @@ +//! Run-time feature detection on RISC-V. + +features! { + @TARGET: riscv; + @CFG: any(target_arch = "riscv32", target_arch = "riscv64"); + @MACRO_NAME: is_riscv_feature_detected; + @MACRO_ATTRS: + /// A macro to test at *runtime* whether instruction sets are available on + /// RISC-V platforms. + /// + /// RISC-V standard defined the base sets and the extension sets. + /// The base sets are RV32I, RV64I, RV32E or RV128I. Any RISC-V platform + /// must support one base set and/or multiple extension sets. + /// + /// Any RISC-V standard instruction sets can be in state of either ratified, + /// frozen or draft. The version and status of current standard instruction + /// sets can be checked out from preface section of the [ISA manual]. + /// + /// Platform may define and support their own custom instruction sets with + /// ISA prefix X. These sets are highly platform specific and should be + /// detected with their own platform support crates. + /// + /// [ISA manual]: https://riscv.org/specifications/ratified/ + /// + /// # Platform-specific/agnostic Behavior and Availability + /// + /// Runtime detection depends on the platform-specific feature detection + /// facility and its availability per feature is + /// highly platform/version-specific. + /// + /// Still, a best-effort attempt is performed to enable subset/dependent + /// features if a superset feature is enabled regardless of the platform. + /// For instance, if the A extension (`"a"`) is enabled, its subsets (the + /// Zalrsc and Zaamo extensions; `"zalrsc"` and `"zaamo"`) are also enabled. + /// Likewise, if the F extension (`"f"`) is enabled, one of its dependencies + /// (the Zicsr extension `"zicsr"`) is also enabled. + /// + /// # Unprivileged Specification + /// + /// The supported ratified RISC-V instruction sets are as follows: + /// + /// * RV32E: `"rv32e"` + /// * RV32I: `"rv32i"` + /// * RV64I: `"rv64i"` + /// * A: `"a"` + /// * Zaamo: `"zaamo"` + /// * Zalrsc: `"zalrsc"` + /// * B: `"b"` + /// * Zba: `"zba"` + /// * Zbb: `"zbb"` + /// * Zbs: `"zbs"` + /// * C: `"c"` + /// * Zca: `"zca"` + /// * Zcd: `"zcd"` (if D is enabled) + /// * Zcf: `"zcf"` (if F is enabled on RV32) + /// * D: `"d"` + /// * F: `"f"` + /// * M: `"m"` + /// * Q: `"q"` + /// * V: `"v"` + /// * Zve32x: `"zve32x"` + /// * Zve32f: `"zve32f"` + /// * Zve64x: `"zve64x"` + /// * Zve64f: `"zve64f"` + /// * Zve64d: `"zve64d"` + /// * Zicbom: `"zicbom"` + /// * Zicboz: `"zicboz"` + /// * Zicntr: `"zicntr"` + /// * Zicond: `"zicond"` + /// * Zicsr: `"zicsr"` + /// * Zifencei: `"zifencei"` + /// * Zihintntl: `"zihintntl"` + /// * Zihintpause: `"zihintpause"` + /// * Zihpm: `"zihpm"` + /// * Zimop: `"zimop"` + /// * Zacas: `"zacas"` + /// * Zawrs: `"zawrs"` + /// * Zfa: `"zfa"` + /// * Zfbfmin: `"zfbfmin"` + /// * Zfh: `"zfh"` + /// * Zfhmin: `"zfhmin"` + /// * Zfinx: `"zfinx"` + /// * Zdinx: `"zdinx"` + /// * Zhinx: `"zhinx"` + /// * Zhinxmin: `"zhinxmin"` + /// * Zcb: `"zcb"` + /// * Zcmop: `"zcmop"` + /// * Zbc: `"zbc"` + /// * Zbkb: `"zbkb"` + /// * Zbkc: `"zbkc"` + /// * Zbkx: `"zbkx"` + /// * Zk: `"zk"` + /// * Zkn: `"zkn"` + /// * Zknd: `"zknd"` + /// * Zkne: `"zkne"` + /// * Zknh: `"zknh"` + /// * Zkr: `"zkr"` + /// * Zks: `"zks"` + /// * Zksed: `"zksed"` + /// * Zksh: `"zksh"` + /// * Zkt: `"zkt"` + /// * Zvbb: `"zvbb"` + /// * Zvbc: `"zvbc"` + /// * Zvfbfmin: `"zvfbfmin"` + /// * Zvfbfwma: `"zvfbfwma"` + /// * Zvfh: `"zvfh"` + /// * Zvfhmin: `"zvfhmin"` + /// * Zvkb: `"zvkb"` + /// * Zvkg: `"zvkg"` + /// * Zvkn: `"zvkn"` + /// * Zvkned: `"zvkned"` + /// * Zvknha: `"zvknha"` + /// * Zvknhb: `"zvknhb"` + /// * Zvknc: `"zvknc"` + /// * Zvkng: `"zvkng"` + /// * Zvks: `"zvks"` + /// * Zvksed: `"zvksed"` + /// * Zvksh: `"zvksh"` + /// * Zvksc: `"zvksc"` + /// * Zvksg: `"zvksg"` + /// * Zvkt: `"zvkt"` + /// * Ztso: `"ztso"` + /// + /// There's also bases and extensions marked as standard instruction set, + /// but they are in frozen or draft state. These instruction sets are also + /// reserved by this macro and can be detected in the future platforms. + /// + /// Draft RISC-V instruction sets: + /// + /// * RV128I: `"rv128i"` + /// * J: `"j"` + /// * P: `"p"` + /// * Zam: `"zam"` + /// + /// # Performance Hints + /// + /// The two features below define performance hints for unaligned + /// scalar/vector memory accesses, respectively. If enabled, it denotes that + /// corresponding unaligned memory access is reasonably fast. + /// + /// * `"unaligned-scalar-mem"` + /// * Runtime detection requires Linux kernel version 6.4 or later. + /// * `"unaligned-vector-mem"` + /// * Runtime detection requires Linux kernel version 6.13 or later. + #[stable(feature = "riscv_ratified", since = "1.78.0")] + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv32i: "rv32i"; + without cfg check: true; + /// RV32I Base Integer Instruction Set + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv32e: "rv32e"; + without cfg check: true; + /// RV32E Base Integer Instruction Set + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv64i: "rv64i"; + without cfg check: true; + /// RV64I Base Integer Instruction Set + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] rv128i: "rv128i"; + without cfg check: true; + /// RV128I Base Integer Instruction Set + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] unaligned_scalar_mem: "unaligned-scalar-mem"; + /// Has reasonably performant unaligned scalar + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] unaligned_vector_mem: "unaligned-vector-mem"; + /// Has reasonably performant unaligned vector + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicsr: "zicsr"; + /// "Zicsr" Extension for Control and Status Register (CSR) Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicntr: "zicntr"; + /// "Zicntr" Extension for Base Counters and Timers + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihpm: "zihpm"; + /// "Zihpm" Extension for Hardware Performance Counters + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zifencei: "zifencei"; + /// "Zifencei" Extension for Instruction-Fetch Fence + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihintntl: "zihintntl"; + /// "Zihintntl" Extension for Non-Temporal Locality Hints + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zihintpause: "zihintpause"; + /// "Zihintpause" Extension for Pause Hint + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zimop: "zimop"; + /// "Zimop" Extension for May-Be-Operations + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicbom: "zicbom"; + /// "Zicbom" Extension for Cache-Block Management Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicboz: "zicboz"; + /// "Zicboz" Extension for Cache-Block Zero Instruction + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zicond: "zicond"; + /// "Zicond" Extension for Integer Conditional Operations + + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] m: "m"; + /// "M" Extension for Integer Multiplication and Division + + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] a: "a"; + /// "A" Extension for Atomic Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zalrsc: "zalrsc"; + /// "Zalrsc" Extension for Load-Reserved/Store-Conditional Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zaamo: "zaamo"; + /// "Zaamo" Extension for Atomic Memory Operations + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zawrs: "zawrs"; + /// "Zawrs" Extension for Wait-on-Reservation-Set Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zacas: "zacas"; + /// "Zacas" Extension for Atomic Compare-and-Swap (CAS) Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zam: "zam"; + without cfg check: true; + /// "Zam" Extension for Misaligned Atomics + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] ztso: "ztso"; + /// "Ztso" Extension for Total Store Ordering + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] f: "f"; + /// "F" Extension for Single-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] d: "d"; + /// "D" Extension for Double-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] q: "q"; + without cfg check: true; + /// "Q" Extension for Quad-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfh: "zfh"; + /// "Zfh" Extension for Half-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfhmin: "zfhmin"; + /// "Zfhmin" Extension for Minimal Half-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfa: "zfa"; + /// "Zfa" Extension for Additional Floating-Point Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfbfmin: "zfbfmin"; + /// "Zfbfmin" Extension for Scalar BF16 Converts + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zfinx: "zfinx"; + /// "Zfinx" Extension for Single-Precision Floating-Point in Integer Registers + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zdinx: "zdinx"; + /// "Zdinx" Extension for Double-Precision Floating-Point in Integer Registers + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zhinx: "zhinx"; + /// "Zhinx" Extension for Half-Precision Floating-Point in Integer Registers + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zhinxmin: "zhinxmin"; + /// "Zhinxmin" Extension for Minimal Half-Precision Floating-Point in Integer Registers + + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] c: "c"; + /// "C" Extension for Compressed Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zca: "zca"; + /// "Zca" Compressed Instructions excluding Floating-Point Loads/Stores + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zcf: "zcf"; + without cfg check: true; + /// "Zcf" Compressed Instructions for Single-Precision Floating-Point Loads/Stores on RV32 + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zcd: "zcd"; + without cfg check: true; + /// "Zcd" Compressed Instructions for Double-Precision Floating-Point Loads/Stores + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zcb: "zcb"; + /// "Zcb" Simple Code-size Saving Compressed Instructions + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zcmop: "zcmop"; + /// "Zcmop" Extension for Compressed May-Be-Operations + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] b: "b"; + /// "B" Extension for Bit Manipulation + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zba: "zba"; + /// "Zba" Extension for Address Generation + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbb: "zbb"; + /// "Zbb" Extension for Basic Bit-Manipulation + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbc: "zbc"; + /// "Zbc" Extension for Carry-less Multiplication + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbs: "zbs"; + /// "Zbs" Extension for Single-Bit Instructions + + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbkb: "zbkb"; + /// "Zbkb" Extension for Bit-Manipulation for Cryptography + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbkc: "zbkc"; + /// "Zbkc" Extension for Carry-less Multiplication for Cryptography + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zbkx: "zbkx"; + /// "Zbkx" Extension for Crossbar Permutations + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zknd: "zknd"; + /// "Zknd" Cryptography Extension for NIST Suite: AES Decryption + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkne: "zkne"; + /// "Zkne" Cryptography Extension for NIST Suite: AES Encryption + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zknh: "zknh"; + /// "Zknh" Cryptography Extension for NIST Suite: Hash Function Instructions + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zksed: "zksed"; + /// "Zksed" Cryptography Extension for ShangMi Suite: SM4 Block Cipher Instructions + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zksh: "zksh"; + /// "Zksh" Cryptography Extension for ShangMi Suite: SM3 Hash Function Instructions + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkr: "zkr"; + /// "Zkr" Entropy Source Extension + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkn: "zkn"; + /// "Zkn" Cryptography Extension for NIST Algorithm Suite + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zks: "zks"; + /// "Zks" Cryptography Extension for ShangMi Algorithm Suite + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zk: "zk"; + /// "Zk" Cryptography Extension for Standard Scalar Cryptography + @FEATURE: #[stable(feature = "riscv_ratified", since = "1.78.0")] zkt: "zkt"; + /// "Zkt" Cryptography Extension for Data Independent Execution Latency + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] v: "v"; + /// "V" Extension for Vector Operations + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zve32x: "zve32x"; + /// "Zve32x" Vector Extension for Embedded Processors (32-bit+; Integer) + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zve32f: "zve32f"; + /// "Zve32f" Vector Extension for Embedded Processors (32-bit+; with Single-Precision Floating-Point) + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zve64x: "zve64x"; + /// "Zve64x" Vector Extension for Embedded Processors (64-bit+; Integer) + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zve64f: "zve64f"; + /// "Zve64f" Vector Extension for Embedded Processors (64-bit+; with Single-Precision Floating-Point) + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zve64d: "zve64d"; + /// "Zve64d" Vector Extension for Embedded Processors (64-bit+; with Double-Precision Floating-Point) + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvfh: "zvfh"; + /// "Zvfh" Vector Extension for Half-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvfhmin: "zvfhmin"; + /// "Zvfhmin" Vector Extension for Minimal Half-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvfbfmin: "zvfbfmin"; + /// "Zvfbfmin" Vector Extension for BF16 Converts + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvfbfwma: "zvfbfwma"; + /// "Zvfbfwma" Vector Extension for BF16 Widening Multiply-Add + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvbb: "zvbb"; + /// "Zvbb" Extension for Vector Basic Bit-Manipulation + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvbc: "zvbc"; + /// "Zvbc" Extension for Vector Carryless Multiplication + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkb: "zvkb"; + /// "Zvkb" Extension for Vector Cryptography Bit-Manipulation + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkg: "zvkg"; + /// "Zvkg" Cryptography Extension for Vector GCM/GMAC + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkned: "zvkned"; + /// "Zvkned" Cryptography Extension for NIST Suite: Vector AES Block Cipher + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvknha: "zvknha"; + /// "Zvknha" Cryptography Extension for Vector SHA-2 Secure Hash (SHA-256) + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvknhb: "zvknhb"; + /// "Zvknhb" Cryptography Extension for Vector SHA-2 Secure Hash (SHA-256/512) + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvksed: "zvksed"; + /// "Zvksed" Cryptography Extension for ShangMi Suite: Vector SM4 Block Cipher + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvksh: "zvksh"; + /// "Zvksh" Cryptography Extension for ShangMi Suite: Vector SM3 Secure Hash + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkn: "zvkn"; + /// "Zvkn" Cryptography Extension for NIST Algorithm Suite + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvknc: "zvknc"; + /// "Zvknc" Cryptography Extension for NIST Algorithm Suite with Carryless Multiply + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkng: "zvkng"; + /// "Zvkng" Cryptography Extension for NIST Algorithm Suite with GCM + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvks: "zvks"; + /// "Zvks" Cryptography Extension for ShangMi Algorithm Suite + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvksc: "zvksc"; + /// "Zvksc" Cryptography Extension for ShangMi Algorithm Suite with Carryless Multiply + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvksg: "zvksg"; + /// "Zvksg" Cryptography Extension for ShangMi Algorithm Suite with GCM + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] zvkt: "zvkt"; + /// "Zvkt" Extension for Vector Data-Independent Execution Latency + + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] j: "j"; + without cfg check: true; + /// "J" Extension for Dynamically Translated Languages + @FEATURE: #[unstable(feature = "stdarch_riscv_feature_detection", issue = "111192")] p: "p"; + without cfg check: true; + /// "P" Extension for Packed-SIMD Instructions +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/s390x.rs b/library/stdarch/crates/std_detect/src/detect/arch/s390x.rs new file mode 100644 index 00000000000..4c20d011680 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/s390x.rs @@ -0,0 +1,81 @@ +//! Run-time feature detection on s390x. + +features! { + @TARGET: s390x; + @CFG: target_arch = "s390x"; + @MACRO_NAME: is_s390x_feature_detected; + @MACRO_ATTRS: + /// Checks if `s390x` feature is enabled. + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] concurrent_functions: "concurrent-functions"; + /// s390x concurrent-functions facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] deflate_conversion: "deflate-conversion"; + /// s390x deflate-conversion facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] enhanced_sort: "enhanced-sort"; + /// s390x enhanced-sort facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] guarded_storage: "guarded-storage"; + /// s390x guarded-storage facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] high_word: "high-word"; + /// s390x high-word facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension3: "message-security-assist-extension3"; + /// s390x message-security-assist-extension3 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension4: "message-security-assist-extension4"; + /// s390x message-security-assist-extension4 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension5: "message-security-assist-extension5"; + /// s390x message-security-assist-extension5 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension8: "message-security-assist-extension8"; + /// s390x message-security-assist-extension8 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension9: "message-security-assist-extension9"; + /// s390x message-security-assist-extension9 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] message_security_assist_extension12: "message-security-assist-extension12"; + /// s390x message-security-assist-extension12 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] miscellaneous_extensions_2: "miscellaneous-extensions-2"; + /// s390x miscellaneous-extensions-2 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] miscellaneous_extensions_3: "miscellaneous-extensions-3"; + /// s390x miscellaneous-extensions-3 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] miscellaneous_extensions_4: "miscellaneous-extensions-4"; + /// s390x miscellaneous-extensions-4 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] nnp_assist: "nnp-assist"; + /// s390x nnp-assist facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] transactional_execution: "transactional-execution"; + /// s390x transactional-execution facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector: "vector"; + /// s390x vector facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_enhancements_1: "vector-enhancements-1"; + /// s390x vector-enhancements-1 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_enhancements_2: "vector-enhancements-2"; + /// s390x vector-enhancements-2 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_enhancements_3: "vector-enhancements-3"; + /// s390x vector-enhancements-3 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal: "vector-packed-decimal"; + /// s390x vector-packed-decimal facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal_enhancement: "vector-packed-decimal-enhancement"; + /// s390x vector-packed-decimal-enhancement facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal_enhancement_2: "vector-packed-decimal-enhancement-2"; + /// s390x vector-packed-decimal-enhancement-2 facility + #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] + @FEATURE: #[unstable(feature = "stdarch_s390x_feature_detection", issue = "135413")] vector_packed_decimal_enhancement_3: "vector-packed-decimal-enhancement-3"; + /// s390x vector-packed-decimal-enhancement-3 facility +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs new file mode 100644 index 00000000000..f23cfc33417 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs @@ -0,0 +1,278 @@ +//! This module implements minimal run-time feature detection for x86. +//! +//! The features are detected using the `detect_features` function below. +//! This function uses the CPUID instruction to read the feature flags from the +//! CPU and encodes them in a `usize` where each bit position represents +//! whether a feature is available (bit is set) or unavailable (bit is cleared). +//! +//! The enum `Feature` is used to map bit positions to feature names, and the +//! the `__crate::detect::check_for!` macro is used to map string literals (e.g., +//! "avx") to these bit positions (e.g., `Feature::avx`). +//! +//! The run-time feature detection is performed by the +//! `__crate::detect::check_for(Feature) -> bool` function. On its first call, +//! this functions queries the CPU for the available features and stores them +//! in a global `AtomicUsize` variable. The query is performed by just checking +//! whether the feature bit in this global variable is set or cleared. + +features! { + @TARGET: x86; + @CFG: any(target_arch = "x86", target_arch = "x86_64"); + @MACRO_NAME: is_x86_feature_detected; + @MACRO_ATTRS: + /// A macro to test at *runtime* whether a CPU feature is available on + /// x86/x86-64 platforms. + /// + /// This macro is provided in the standard library and will detect at runtime + /// whether the specified CPU feature is detected. This does **not** resolve at + /// compile time unless the specified feature is already enabled for the entire + /// crate. Runtime detection currently relies mostly on the `cpuid` instruction. + /// + /// This macro only takes one argument which is a string literal of the feature + /// being tested for. The feature names supported are the lowercase versions of + /// the ones defined by Intel in [their documentation][docs]. + /// + /// ## Supported arguments + /// + /// This macro supports the same names that `#[target_feature]` supports. Unlike + /// `#[target_feature]`, however, this macro does not support names separated + /// with a comma. Instead testing for multiple features must be done through + /// separate macro invocations for now. + /// + /// Supported arguments are: + /// + /// * `"aes"` + /// * `"pclmulqdq"` + /// * `"rdrand"` + /// * `"rdseed"` + /// * `"tsc"` + /// * `"mmx"` + /// * `"sse"` + /// * `"sse2"` + /// * `"sse3"` + /// * `"ssse3"` + /// * `"sse4.1"` + /// * `"sse4.2"` + /// * `"sse4a"` + /// * `"sha"` + /// * `"avx"` + /// * `"avx2"` + /// * `"sha512"` + /// * `"sm3"` + /// * `"sm4"` + /// * `"avx512f"` + /// * `"avx512cd"` + /// * `"avx512er"` + /// * `"avx512pf"` + /// * `"avx512bw"` + /// * `"avx512dq"` + /// * `"avx512vl"` + /// * `"avx512ifma"` + /// * `"avx512vbmi"` + /// * `"avx512vpopcntdq"` + /// * `"avx512vbmi2"` + /// * `"gfni"` + /// * `"vaes"` + /// * `"vpclmulqdq"` + /// * `"avx512vnni"` + /// * `"avx512bitalg"` + /// * `"avx512bf16"` + /// * `"avx512vp2intersect"` + /// * `"avx512fp16"` + /// * `"avxvnni"` + /// * `"avxifma"` + /// * `"avxneconvert"` + /// * `"avxvnniint8"` + /// * `"avxvnniint16"` + /// * `"amx-tile"` + /// * `"amx-int8"` + /// * `"amx-bf16"` + /// * `"amx-fp16"` + /// * `"amx-complex"` + /// * `"amx-avx512"` + /// * `"amx-fp8"` + /// * `"amx-movrs"` + /// * `"amx-tf32"` + /// * `"amx-transpose"` + /// * `"f16c"` + /// * `"fma"` + /// * `"bmi1"` + /// * `"bmi2"` + /// * `"abm"` + /// * `"lzcnt"` + /// * `"tbm"` + /// * `"popcnt"` + /// * `"fxsr"` + /// * `"xsave"` + /// * `"xsaveopt"` + /// * `"xsaves"` + /// * `"xsavec"` + /// * `"cmpxchg16b"` + /// * `"kl"` + /// * `"widekl"` + /// * `"adx"` + /// * `"rtm"` + /// * `"movbe"` + /// * `"ermsb"` + /// * `"movrs"` + /// * `"xop"` + /// + /// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide + #[stable(feature = "simd_x86", since = "1.27.0")] + @BIND_FEATURE_NAME: "abm"; "lzcnt"; // abm is a synonym for lzcnt + @BIND_FEATURE_NAME: "avx512gfni"; "gfni"; #[deprecated(since = "1.67.0", note = "the `avx512gfni` feature has been renamed to `gfni`")]; + @BIND_FEATURE_NAME: "avx512vaes"; "vaes"; #[deprecated(since = "1.67.0", note = "the `avx512vaes` feature has been renamed to `vaes`")]; + @BIND_FEATURE_NAME: "avx512vpclmulqdq"; "vpclmulqdq"; #[deprecated(since = "1.67.0", note = "the `avx512vpclmulqdq` feature has been renamed to `vpclmulqdq`")]; + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] aes: "aes"; + /// AES (Advanced Encryption Standard New Instructions AES-NI) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] pclmulqdq: "pclmulqdq"; + /// CLMUL (Carry-less Multiplication) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] rdrand: "rdrand"; + /// RDRAND + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] rdseed: "rdseed"; + /// RDSEED + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] tsc: "tsc"; + without cfg check: true; + /// TSC (Time Stamp Counter) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] mmx: "mmx"; + without cfg check: true; + /// MMX (MultiMedia eXtensions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse: "sse"; + /// SSE (Streaming SIMD Extensions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse2: "sse2"; + /// SSE2 (Streaming SIMD Extensions 2) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse3: "sse3"; + /// SSE3 (Streaming SIMD Extensions 3) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] ssse3: "ssse3"; + /// SSSE3 (Supplemental Streaming SIMD Extensions 3) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse4_1: "sse4.1"; + /// SSE4.1 (Streaming SIMD Extensions 4.1) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse4_2: "sse4.2"; + /// SSE4.2 (Streaming SIMD Extensions 4.2) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse4a: "sse4a"; + /// SSE4a (Streaming SIMD Extensions 4a) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sha: "sha"; + /// SHA + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx: "avx"; + /// AVX (Advanced Vector Extensions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx2: "avx2"; + /// AVX2 (Advanced Vector Extensions 2) + @FEATURE: #[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] sha512: "sha512"; + /// SHA512 + @FEATURE: #[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] sm3: "sm3"; + /// SM3 + @FEATURE: #[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] sm4: "sm4"; + /// SM4 + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512f: "avx512f" ; + /// AVX-512 F (Foundation) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512cd: "avx512cd" ; + /// AVX-512 CD (Conflict Detection Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512er: "avx512er"; + without cfg check: true; + /// AVX-512 ER (Expo nential and Reciprocal Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512pf: "avx512pf"; + without cfg check: true; + /// AVX-512 PF (Prefetch Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512bw: "avx512bw"; + /// AVX-512 BW (Byte and Word Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512dq: "avx512dq"; + /// AVX-512 DQ (Doubleword and Quadword) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vl: "avx512vl"; + /// AVX-512 VL (Vector Length Extensions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512ifma: "avx512ifma"; + /// AVX-512 IFMA (Integer Fused Multiply Add) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi: "avx512vbmi"; + /// AVX-512 VBMI (Vector Byte Manipulation Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vpopcntdq: "avx512vpopcntdq"; + /// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and Quadword) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi2: "avx512vbmi2"; + /// AVX-512 VBMI2 (Additional byte, word, dword and qword capabilities) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] gfni: "gfni"; + /// AVX-512 GFNI (Galois Field New Instruction) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] vaes: "vaes"; + /// AVX-512 VAES (Vector AES instruction) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] vpclmulqdq: "vpclmulqdq"; + /// AVX-512 VPCLMULQDQ (Vector PCLMULQDQ instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vnni: "avx512vnni"; + /// AVX-512 VNNI (Vector Neural Network Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512bitalg: "avx512bitalg"; + /// AVX-512 BITALG (Support for VPOPCNT\[B,W\] and VPSHUFBITQMB) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512bf16: "avx512bf16"; + /// AVX-512 BF16 (BFLOAT16 instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vp2intersect: "avx512vp2intersect"; + /// AVX-512 P2INTERSECT + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512fp16: "avx512fp16"; + /// AVX-512 FP16 (FLOAT16 instructions) + @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.89.0")] avxifma: "avxifma"; + /// AVX-IFMA (Integer Fused Multiply Add) + @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.89.0")] avxneconvert: "avxneconvert"; + /// AVX-NE-CONVERT (Exceptionless Convert) + @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.89.0")] avxvnni: "avxvnni"; + /// AVX-VNNI (Vector Neural Network Instructions) + @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.89.0")] avxvnniint16: "avxvnniint16"; + /// AVX-VNNI_INT8 (VNNI with 16-bit Integers) + @FEATURE: #[stable(feature = "avx512_target_feature", since = "1.89.0")] avxvnniint8: "avxvnniint8"; + /// AVX-VNNI_INT16 (VNNI with 8-bit integers) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_tile: "amx-tile"; + /// AMX (Advanced Matrix Extensions) - Tile load/store + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_int8: "amx-int8"; + /// AMX-INT8 (Operations on 8-bit integers) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_bf16: "amx-bf16"; + /// AMX-BF16 (BFloat16 Operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_fp16: "amx-fp16"; + /// AMX-FP16 (Float16 Operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_complex: "amx-complex"; + /// AMX-COMPLEX (Complex number Operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_avx512: "amx-avx512"; + /// AMX-AVX512 (AVX512 operations extended to matrices) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_fp8: "amx-fp8"; + /// AMX-FP8 (Float8 Operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_movrs: "amx-movrs"; + /// AMX-MOVRS (Matrix MOVERS operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_tf32: "amx-tf32"; + /// AMX-TF32 (TensorFloat32 Operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_transpose: "amx-transpose"; + /// AMX-TRANSPOSE (Matrix Transpose Operations) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] f16c: "f16c"; + /// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fma: "fma"; + /// FMA (Fused Multiply Add) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] bmi1: "bmi1" ; + /// BMI1 (Bit Manipulation Instructions 1) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] bmi2: "bmi2" ; + /// BMI2 (Bit Manipulation Instructions 2) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] lzcnt: "lzcnt"; + /// ABM (Advanced Bit Manipulation) / LZCNT (Leading Zero Count) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] tbm: "tbm"; + /// TBM (Trailing Bit Manipulation) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] popcnt: "popcnt"; + /// POPCNT (Population Count) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fxsr: "fxsr"; + /// FXSR (Floating-point context fast save and restore) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsave: "xsave"; + /// XSAVE (Save Processor Extended States) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsaveopt: "xsaveopt"; + /// XSAVEOPT (Save Processor Extended States Optimized) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsaves: "xsaves"; + /// XSAVES (Save Processor Extended States Supervisor) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsavec: "xsavec"; + /// XSAVEC (Save Processor Extended States Compacted) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] cmpxchg16b: "cmpxchg16b"; + /// CMPXCH16B (16-byte compare-and-swap instruction) + @FEATURE: #[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")] kl: "kl"; + /// Intel Key Locker + @FEATURE: #[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")] widekl: "widekl"; + /// Intel Key Locker Wide + @FEATURE: #[stable(feature = "simd_x86_adx", since = "1.33.0")] adx: "adx"; + /// ADX, Intel ADX (Multi-Precision Add-Carry Instruction Extensions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] rtm: "rtm"; + /// RTM, Intel (Restricted Transactional Memory) + @FEATURE: #[stable(feature = "movbe_target_feature", since = "1.67.0")] movbe: "movbe"; + /// MOVBE (Move Data After Swapping Bytes) + @FEATURE: #[unstable(feature = "movrs_target_feature", issue = "137976")] movrs: "movrs"; + /// MOVRS (Move data with the read-shared hint) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] ermsb: "ermsb"; + /// ERMSB, Enhanced REP MOVSB and STOSB + @FEATURE: #[unstable(feature = "xop_target_feature", issue = "127208")] xop: "xop"; + /// XOP: eXtended Operations (AMD) +} diff --git a/library/stdarch/crates/std_detect/src/detect/bit.rs b/library/stdarch/crates/std_detect/src/detect/bit.rs new file mode 100644 index 00000000000..6f06c5523e4 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/bit.rs @@ -0,0 +1,9 @@ +//! Bit manipulation utilities. + +/// Tests the `bit` of `x`. +#[allow(dead_code)] +#[inline] +pub(crate) fn test(x: usize, bit: u32) -> bool { + debug_assert!(bit < usize::BITS, "bit index out-of-bounds"); + x & (1 << bit) != 0 +} diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs new file mode 100644 index 00000000000..83bcedea612 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/cache.rs @@ -0,0 +1,223 @@ +//! Caches run-time feature detection so that it only needs to be computed +//! once. + +#![allow(dead_code)] // not used on all platforms + +use core::sync::atomic::Ordering; + +use core::sync::atomic::AtomicUsize; + +/// Sets the `bit` of `x`. +#[inline] +const fn set_bit(x: u128, bit: u32) -> u128 { + x | 1 << bit +} + +/// Tests the `bit` of `x`. +#[inline] +const fn test_bit(x: u128, bit: u32) -> bool { + x & (1 << bit) != 0 +} + +/// Unset the `bit of `x`. +#[inline] +const fn unset_bit(x: u128, bit: u32) -> u128 { + x & !(1 << bit) +} + +/// Maximum number of features that can be cached. +const CACHE_CAPACITY: u32 = 93; + +/// This type is used to initialize the cache +// The derived `Default` implementation will initialize the field to zero, +// which is what we want. +#[derive(Copy, Clone, Default, PartialEq, Eq)] +pub(crate) struct Initializer(u128); + +// NOTE: the `debug_assert!` would catch that we do not add more Features than +// the one fitting our cache. +impl Initializer { + /// Tests the `bit` of the cache. + #[inline] + pub(crate) fn test(self, bit: u32) -> bool { + debug_assert!( + bit < CACHE_CAPACITY, + "too many features, time to increase the cache size!" + ); + test_bit(self.0, bit) + } + + /// Sets the `bit` of the cache. + #[inline] + pub(crate) fn set(&mut self, bit: u32) { + debug_assert!( + bit < CACHE_CAPACITY, + "too many features, time to increase the cache size!" + ); + let v = self.0; + self.0 = set_bit(v, bit); + } + + /// Unsets the `bit` of the cache. + #[inline] + pub(crate) fn unset(&mut self, bit: u32) { + debug_assert!( + bit < CACHE_CAPACITY, + "too many features, time to increase the cache size!" + ); + let v = self.0; + self.0 = unset_bit(v, bit); + } +} + +/// This global variable is a cache of the features supported by the CPU. +// Note: the third slot is only used in x86 +// Another Slot can be added if needed without any change to `Initializer` +static CACHE: [Cache; 3] = [ + Cache::uninitialized(), + Cache::uninitialized(), + Cache::uninitialized(), +]; + +/// Feature cache with capacity for `size_of::<usize>() * 8 - 1` features. +/// +/// Note: 0 is used to represent an uninitialized cache, and (at least) the most +/// significant bit is set on any cache which has been initialized. +/// +/// Note: we use `Relaxed` atomic operations, because we are only interested in +/// the effects of operations on a single memory location. That is, we only need +/// "modification order", and not the full-blown "happens before". +struct Cache(AtomicUsize); + +impl Cache { + const CAPACITY: u32 = (core::mem::size_of::<usize>() * 8 - 1) as u32; + const MASK: usize = (1 << Cache::CAPACITY) - 1; + const INITIALIZED_BIT: usize = 1usize << Cache::CAPACITY; + + /// Creates an uninitialized cache. + #[allow(clippy::declare_interior_mutable_const)] + const fn uninitialized() -> Self { + Cache(AtomicUsize::new(0)) + } + + /// Is the `bit` in the cache set? Returns `None` if the cache has not been initialized. + #[inline] + pub(crate) fn test(&self, bit: u32) -> Option<bool> { + let cached = self.0.load(Ordering::Relaxed); + if cached == 0 { + None + } else { + Some(test_bit(cached as u128, bit)) + } + } + + /// Initializes the cache. + #[inline] + fn initialize(&self, value: usize) -> usize { + debug_assert_eq!((value & !Cache::MASK), 0); + self.0 + .store(value | Cache::INITIALIZED_BIT, Ordering::Relaxed); + value + } +} + +cfg_if::cfg_if! { + if #[cfg(feature = "std_detect_env_override")] { + #[inline] + fn disable_features(disable: &[u8], value: &mut Initializer) { + if let Ok(disable) = core::str::from_utf8(disable) { + for v in disable.split(" ") { + let _ = super::Feature::from_str(v).map(|v| value.unset(v as u32)); + } + } + } + + #[inline] + fn initialize(mut value: Initializer) -> Initializer { + use core::ffi::CStr; + const RUST_STD_DETECT_UNSTABLE: &CStr = c"RUST_STD_DETECT_UNSTABLE"; + cfg_if::cfg_if! { + if #[cfg(windows)] { + use alloc::vec; + #[link(name = "kernel32")] + unsafe extern "system" { + fn GetEnvironmentVariableA(name: *const u8, buffer: *mut u8, size: u32) -> u32; + } + let len = unsafe { GetEnvironmentVariableA(RUST_STD_DETECT_UNSTABLE.as_ptr().cast::<u8>(), core::ptr::null_mut(), 0) }; + if len > 0 { + // +1 to include the null terminator. + let mut env = vec![0; len as usize + 1]; + let len = unsafe { GetEnvironmentVariableA(RUST_STD_DETECT_UNSTABLE.as_ptr().cast::<u8>(), env.as_mut_ptr(), len + 1) }; + if len > 0 { + disable_features(&env[..len as usize], &mut value); + } + } + } else { + let env = unsafe { + libc::getenv(RUST_STD_DETECT_UNSTABLE.as_ptr()) + }; + if !env.is_null() { + let len = unsafe { libc::strlen(env) }; + let env = unsafe { core::slice::from_raw_parts(env as *const u8, len) }; + disable_features(env, &mut value); + } + } + } + do_initialize(value); + value + } + } else { + #[inline] + fn initialize(value: Initializer) -> Initializer { + do_initialize(value); + value + } + } +} + +#[inline] +fn do_initialize(value: Initializer) { + CACHE[0].initialize((value.0) as usize & Cache::MASK); + CACHE[1].initialize((value.0 >> Cache::CAPACITY) as usize & Cache::MASK); + CACHE[2].initialize((value.0 >> (2 * Cache::CAPACITY)) as usize & Cache::MASK); +} + +// We only have to detect features once, and it's fairly costly, so hint to LLVM +// that it should assume that cache hits are more common than misses (which is +// the point of caching). It's possibly unfortunate that this function needs to +// reach across modules like this to call `os::detect_features`, but it produces +// the best code out of several attempted variants. +// +// The `Initializer` that the cache was initialized with is returned, so that +// the caller can call `test()` on it without having to load the value from the +// cache again. +#[cold] +fn detect_and_initialize() -> Initializer { + initialize(super::os::detect_features()) +} + +/// Tests the `bit` of the storage. If the storage has not been initialized, +/// initializes it with the result of `os::detect_features()`. +/// +/// On its first invocation, it detects the CPU features and caches them in the +/// `CACHE` global variable as an `AtomicU64`. +/// +/// It uses the `Feature` variant to index into this variable as a bitset. If +/// the bit is set, the feature is enabled, and otherwise it is disabled. +/// +/// If the feature `std_detect_env_override` is enabled looks for the env +/// variable `RUST_STD_DETECT_UNSTABLE` and uses its content to disable +/// Features that would had been otherwise detected. +#[inline] +pub(crate) fn test(bit: u32) -> bool { + let (relative_bit, idx) = if bit < Cache::CAPACITY { + (bit, 0) + } else if bit < 2 * Cache::CAPACITY { + (bit - Cache::CAPACITY, 1) + } else { + (bit - 2 * Cache::CAPACITY, 2) + }; + CACHE[idx] + .test(relative_bit) + .unwrap_or_else(|| detect_and_initialize().test(bit)) +} diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs new file mode 100644 index 00000000000..a2994fb7daa --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/macros.rs @@ -0,0 +1,204 @@ +#[macro_export] +#[allow_internal_unstable(stdarch_internal)] +#[unstable(feature = "stdarch_internal", issue = "none")] +macro_rules! detect_feature { + ($feature:tt, $feature_lit:tt) => { + $crate::detect_feature!($feature, $feature_lit : $feature_lit) + }; + ($feature:tt, $feature_lit:tt : $($target_feature_lit:tt),*) => { + $(cfg!(target_feature = $target_feature_lit) ||)* + $crate::detect::__is_feature_detected::$feature() + }; + ($feature:tt, $feature_lit:tt, without cfg check: true) => { + $crate::detect::__is_feature_detected::$feature() + }; +} + +#[allow(unused_macros, reason = "it's used in the features! macro below")] +macro_rules! check_cfg_feature { + ($feature:tt, $feature_lit:tt) => { + check_cfg_feature!($feature, $feature_lit : $feature_lit) + }; + ($feature:tt, $feature_lit:tt : $($target_feature_lit:tt),*) => { + $(cfg!(target_feature = $target_feature_lit);)* + }; + ($feature:tt, $feature_lit:tt, without cfg check: $feature_cfg_check:literal) => { + #[allow(unexpected_cfgs, reason = $feature_lit)] + { cfg!(target_feature = $feature_lit) } + }; +} + +#[allow(unused)] +macro_rules! features { + ( + @TARGET: $target:ident; + @CFG: $cfg:meta; + @MACRO_NAME: $macro_name:ident; + @MACRO_ATTRS: $(#[$macro_attrs:meta])* + $(@BIND_FEATURE_NAME: $bind_feature:tt; $feature_impl:tt; $(#[$deprecate_attr:meta];)?)* + $(@NO_RUNTIME_DETECTION: $nort_feature:tt; )* + $(@FEATURE: #[$stability_attr:meta] $feature:ident: $feature_lit:tt; + $(without cfg check: $feature_cfg_check:tt;)? + $(implied by target_features: [$($target_feature_lit:tt),*];)? + $(#[$feature_comment:meta])*)* + ) => { + #[macro_export] + $(#[$macro_attrs])* + #[allow_internal_unstable(stdarch_internal)] + #[cfg($cfg)] + #[doc(cfg($cfg))] + macro_rules! $macro_name { + $( + ($feature_lit) => { + $crate::detect_feature!($feature, $feature_lit $(, without cfg check: $feature_cfg_check)? $(: $($target_feature_lit),*)?) + }; + )* + $( + ($bind_feature) => { + { + $( + #[$deprecate_attr] macro_rules! deprecated_feature { {} => {}; } + deprecated_feature! {}; + )? + $crate::$macro_name!($feature_impl) + } + }; + )* + $( + ($nort_feature) => { + compile_error!( + concat!( + stringify!($nort_feature), + " feature cannot be detected at run-time" + ) + ) + }; + )* + ($t:tt,) => { + $crate::$macro_name!($t); + }; + ($t:tt) => { + compile_error!( + concat!( + concat!("unknown ", stringify!($target)), + concat!(" target feature: ", $t) + ) + ) + }; + } + + $(#[$macro_attrs])* + #[macro_export] + #[cfg(not($cfg))] + #[doc(cfg($cfg))] + macro_rules! $macro_name { + $( + ($feature_lit) => { + compile_error!( + concat!( + r#"This macro cannot be used on the current target. + You can prevent it from being used in other architectures by + guarding it behind a cfg("#, + stringify!($cfg), + ")." + ) + ) + }; + )* + $( + ($bind_feature) => { $crate::$macro_name!($feature_impl) }; + )* + $( + ($nort_feature) => { + compile_error!( + concat!( + stringify!($nort_feature), + " feature cannot be detected at run-time" + ) + ) + }; + )* + ($t:tt,) => { + $crate::$macro_name!($t); + }; + ($t:tt) => { + compile_error!( + concat!( + concat!("unknown ", stringify!($target)), + concat!(" target feature: ", $t) + ) + ) + }; + } + + #[test] + #[deny(unexpected_cfgs)] + #[deny(unfulfilled_lint_expectations)] + fn unexpected_cfgs() { + $( + check_cfg_feature!($feature, $feature_lit $(, without cfg check: $feature_cfg_check)? $(: $($target_feature_lit),*)?); + )* + } + + /// Each variant denotes a position in a bitset for a particular feature. + /// + /// PLEASE: do not use this, it is an implementation detail subject + /// to change. + #[doc(hidden)] + #[allow(non_camel_case_types)] + #[derive(Copy, Clone)] + #[repr(u8)] + #[unstable(feature = "stdarch_internal", issue = "none")] + #[cfg($cfg)] + pub(crate) enum Feature { + $( + $(#[$feature_comment])* + $feature, + )* + + // Do not add variants after last: + _last + } + + #[cfg($cfg)] + impl Feature { + pub(crate) fn to_str(self) -> &'static str { + match self { + $(Feature::$feature => $feature_lit,)* + Feature::_last => unreachable!(), + } + } + + #[cfg(feature = "std_detect_env_override")] + pub(crate) fn from_str(s: &str) -> Result<Feature, ()> { + match s { + $($feature_lit => Ok(Feature::$feature),)* + _ => Err(()) + } + } + } + + /// Each function performs run-time feature detection for a single + /// feature. This allow us to use stability attributes on a per feature + /// basis. + /// + /// PLEASE: do not use this, it is an implementation detail subject + /// to change. + #[doc(hidden)] + #[cfg($cfg)] + #[unstable(feature = "stdarch_internal", issue = "none")] + pub mod __is_feature_detected { + $( + + /// PLEASE: do not use this, it is an implementation detail + /// subject to change. + #[inline] + #[doc(hidden)] + #[$stability_attr] + pub fn $feature() -> bool { + $crate::detect::check_for($crate::detect::Feature::$feature) + } + )* + } + }; +} diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs new file mode 100644 index 00000000000..8fd3d957932 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/mod.rs @@ -0,0 +1,120 @@ +//! This module implements run-time feature detection. +//! +//! The `is_{arch}_feature_detected!("feature-name")` macros take the name of a +//! feature as a string-literal, and return a boolean indicating whether the +//! feature is enabled at run-time or not. +//! +//! These macros do two things: +//! * map the string-literal into an integer stored as a `Feature` enum, +//! * call a `os::check_for(x: Feature)` function that returns `true` if the +//! feature is enabled. +//! +//! The `Feature` enums are also implemented in the `arch/{target_arch}.rs` +//! modules. +//! +//! The `check_for` functions are, in general, Operating System dependent. Most +//! architectures do not allow user-space programs to query the feature bits +//! due to security concerns (x86 is the big exception). These functions are +//! implemented in the `os/{target_os}.rs` modules. + +use cfg_if::cfg_if; + +#[macro_use] +mod macros; + +mod arch; + +// This module needs to be public because the `is_{arch}_feature_detected!` +// macros expand calls to items within it in user crates. +#[doc(hidden)] +#[unstable(feature = "stdarch_internal", issue = "none")] +pub use self::arch::__is_feature_detected; + +pub(crate) use self::arch::Feature; + +mod bit; +mod cache; + +cfg_if! { + if #[cfg(miri)] { + // When running under miri all target-features that are not enabled at + // compile-time are reported as disabled at run-time. + // + // For features for which `cfg(target_feature)` returns true, + // this run-time detection logic is never called. + #[path = "os/other.rs"] + mod os; + } else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { + // On x86/x86_64 no OS specific functionality is required. + #[path = "os/x86.rs"] + mod os; + } else if #[cfg(all(any(target_os = "linux", target_os = "android"), feature = "libc"))] { + #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] + #[path = "os/riscv.rs"] + mod riscv; + #[path = "os/linux/mod.rs"] + mod os; + } else if #[cfg(all(target_os = "freebsd", feature = "libc"))] { + #[cfg(target_arch = "aarch64")] + #[path = "os/aarch64.rs"] + mod aarch64; + #[path = "os/freebsd/mod.rs"] + mod os; + } else if #[cfg(all(target_os = "openbsd", target_arch = "aarch64", feature = "libc"))] { + #[allow(dead_code)] // we don't use code that calls the mrs instruction. + #[path = "os/aarch64.rs"] + mod aarch64; + #[path = "os/openbsd/aarch64.rs"] + mod os; + } else if #[cfg(all(target_os = "windows", any(target_arch = "aarch64", target_arch = "arm64ec")))] { + #[path = "os/windows/aarch64.rs"] + mod os; + } else if #[cfg(all(target_vendor = "apple", target_arch = "aarch64", feature = "libc"))] { + #[path = "os/darwin/aarch64.rs"] + mod os; + } else { + #[path = "os/other.rs"] + mod os; + } +} + +/// Performs run-time feature detection. +#[inline] +#[allow(dead_code)] +fn check_for(x: Feature) -> bool { + cache::test(x as u32) +} + +/// Returns an `Iterator<Item=(&'static str, bool)>` where +/// `Item.0` is the feature name, and `Item.1` is a `bool` which +/// is `true` if the feature is supported by the host and `false` otherwise. +#[unstable(feature = "stdarch_internal", issue = "none")] +pub fn features() -> impl Iterator<Item = (&'static str, bool)> { + cfg_if! { + if #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64", + target_arch = "arm64ec", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "mips", + target_arch = "mips64", + target_arch = "loongarch64", + target_arch = "s390x", + ))] { + (0_u8..Feature::_last as u8).map(|discriminant: u8| { + #[allow(bindings_with_variant_name)] // RISC-V has Feature::f + let f: Feature = unsafe { core::mem::transmute(discriminant) }; + let name: &'static str = f.to_str(); + let enabled: bool = check_for(f); + (name, enabled) + }) + } else { + None.into_iter() + } + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs new file mode 100644 index 00000000000..1ff2a17e6e1 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs @@ -0,0 +1,130 @@ +//! Run-time feature detection for Aarch64 on any OS that emulates the mrs instruction. +//! +//! On FreeBSD >= 12.0, Linux >= 4.11 and other operating systems, it is possible to use +//! privileged system registers from userspace to check CPU feature support. +//! +//! AArch64 system registers ID_AA64ISAR0_EL1, ID_AA64PFR0_EL1, ID_AA64ISAR1_EL1 +//! have bits dedicated to features like AdvSIMD, CRC32, AES, atomics (LSE), etc. +//! Each part of the register indicates the level of support for a certain feature, e.g. +//! when ID_AA64ISAR0_EL1\[7:4\] is >= 1, AES is supported; when it's >= 2, PMULL is supported. +//! +//! For proper support of [SoCs where different cores have different capabilities](https://medium.com/@jadr2ddude/a-big-little-problem-a-tale-of-big-little-gone-wrong-e7778ce744bb), +//! the OS has to always report only the features supported by all cores, like [FreeBSD does](https://reviews.freebsd.org/D17137#393947). +//! +//! References: +//! +//! - [Zircon implementation](https://fuchsia.googlesource.com/zircon/+/master/kernel/arch/arm64/feature.cpp) +//! - [Linux documentation](https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt) +//! - [ARM documentation](https://developer.arm.com/documentation/ddi0601/2022-12/AArch64-Registers?lang=en) + +use crate::detect::{Feature, cache}; +use core::arch::asm; + +/// Try to read the features from the system registers. +/// +/// This will cause SIGILL if the current OS is not trapping the mrs instruction. +pub(crate) fn detect_features() -> cache::Initializer { + // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0 + let aa64isar0: u64; + unsafe { + asm!( + "mrs {}, ID_AA64ISAR0_EL1", + out(reg) aa64isar0, + options(pure, nomem, preserves_flags, nostack) + ); + } + + // ID_AA64ISAR1_EL1 - Instruction Set Attribute Register 1 + let aa64isar1: u64; + unsafe { + asm!( + "mrs {}, ID_AA64ISAR1_EL1", + out(reg) aa64isar1, + options(pure, nomem, preserves_flags, nostack) + ); + } + + // ID_AA64MMFR2_EL1 - AArch64 Memory Model Feature Register 2 + let aa64mmfr2: u64; + unsafe { + asm!( + "mrs {}, ID_AA64MMFR2_EL1", + out(reg) aa64mmfr2, + options(pure, nomem, preserves_flags, nostack) + ); + } + + // ID_AA64PFR0_EL1 - Processor Feature Register 0 + let aa64pfr0: u64; + unsafe { + asm!( + "mrs {}, ID_AA64PFR0_EL1", + out(reg) aa64pfr0, + options(pure, nomem, preserves_flags, nostack) + ); + } + + parse_system_registers(aa64isar0, aa64isar1, aa64mmfr2, Some(aa64pfr0)) +} + +pub(crate) fn parse_system_registers( + aa64isar0: u64, + aa64isar1: u64, + aa64mmfr2: u64, + aa64pfr0: Option<u64>, +) -> cache::Initializer { + let mut value = cache::Initializer::default(); + + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0 + enable_feature(Feature::pmull, bits_shift(aa64isar0, 7, 4) >= 2); + enable_feature(Feature::tme, bits_shift(aa64isar0, 27, 24) == 1); + enable_feature(Feature::lse, bits_shift(aa64isar0, 23, 20) >= 2); + enable_feature(Feature::crc, bits_shift(aa64isar0, 19, 16) >= 1); + + // ID_AA64PFR0_EL1 - Processor Feature Register 0 + if let Some(aa64pfr0) = aa64pfr0 { + let fp = bits_shift(aa64pfr0, 19, 16) < 0xF; + let fphp = bits_shift(aa64pfr0, 19, 16) >= 1; + let asimd = bits_shift(aa64pfr0, 23, 20) < 0xF; + let asimdhp = bits_shift(aa64pfr0, 23, 20) >= 1; + enable_feature(Feature::fp, fp); + enable_feature(Feature::fp16, fphp); + // SIMD support requires float support - if half-floats are + // supported, it also requires half-float support: + enable_feature(Feature::asimd, fp && asimd && (!fphp | asimdhp)); + // SIMD extensions require SIMD support: + enable_feature(Feature::aes, asimd && bits_shift(aa64isar0, 7, 4) >= 2); + let sha1 = bits_shift(aa64isar0, 11, 8) >= 1; + let sha2 = bits_shift(aa64isar0, 15, 12) >= 1; + enable_feature(Feature::sha2, asimd && sha1 && sha2); + enable_feature(Feature::rdm, asimd && bits_shift(aa64isar0, 31, 28) >= 1); + enable_feature( + Feature::dotprod, + asimd && bits_shift(aa64isar0, 47, 44) >= 1, + ); + enable_feature(Feature::sve, asimd && bits_shift(aa64pfr0, 35, 32) >= 1); + } + + // ID_AA64ISAR1_EL1 - Instruction Set Attribute Register 1 + // Check for either APA or API field + enable_feature(Feature::paca, bits_shift(aa64isar1, 11, 4) >= 1); + enable_feature(Feature::rcpc, bits_shift(aa64isar1, 23, 20) >= 1); + // Check for either GPA or GPI field + enable_feature(Feature::pacg, bits_shift(aa64isar1, 31, 24) >= 1); + + // ID_AA64MMFR2_EL1 - AArch64 Memory Model Feature Register 2 + enable_feature(Feature::lse2, bits_shift(aa64mmfr2, 35, 32) >= 1); + + value +} + +#[inline] +fn bits_shift(x: u64, high: usize, low: usize) -> u64 { + (x >> low) & ((1 << (high - low + 1)) - 1) +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs new file mode 100644 index 00000000000..44d921689e5 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/darwin/aarch64.rs @@ -0,0 +1,155 @@ +//! Run-time feature detection for aarch64 on Darwin (macOS/iOS/tvOS/watchOS/visionOS). +//! +//! <https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics> + +use crate::detect::{Feature, cache}; +use core::ffi::CStr; + +#[inline] +fn _sysctlbyname(name: &CStr) -> bool { + use libc; + + let mut enabled: i32 = 0; + let mut enabled_len: usize = 4; + let enabled_ptr = &mut enabled as *mut i32 as *mut libc::c_void; + + let ret = unsafe { + libc::sysctlbyname( + name.as_ptr(), + enabled_ptr, + &mut enabled_len, + core::ptr::null_mut(), + 0, + ) + }; + + match ret { + 0 => enabled != 0, + _ => false, + } +} + +/// Try to read the features using sysctlbyname. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + // Armv8.0 features not using the standard identifiers + let fp = _sysctlbyname(c"hw.optional.floatingpoint"); + let asimd = _sysctlbyname(c"hw.optional.AdvSIMD"); + let crc = _sysctlbyname(c"hw.optional.armv8_crc32"); + + // Armv8 and Armv9 features using the standard identifiers + let aes = _sysctlbyname(c"hw.optional.arm.FEAT_AES"); + let bf16 = _sysctlbyname(c"hw.optional.arm.FEAT_BF16"); + let bti = _sysctlbyname(c"hw.optional.arm.FEAT_BTI"); + let cssc = _sysctlbyname(c"hw.optional.arm.FEAT_CSSC"); + let dit = _sysctlbyname(c"hw.optional.arm.FEAT_DIT"); + let dpb = _sysctlbyname(c"hw.optional.arm.FEAT_DPB"); + let dpb2 = _sysctlbyname(c"hw.optional.arm.FEAT_DPB2"); + let dotprod = _sysctlbyname(c"hw.optional.arm.FEAT_DotProd"); + let ecv = _sysctlbyname(c"hw.optional.arm.FEAT_ECV"); + let fcma = _sysctlbyname(c"hw.optional.arm.FEAT_FCMA"); + let fhm = _sysctlbyname(c"hw.optional.arm.FEAT_FHM"); + let fp16 = _sysctlbyname(c"hw.optional.arm.FEAT_FP16"); + let frintts = _sysctlbyname(c"hw.optional.arm.FEAT_FRINTTS"); + let flagm = _sysctlbyname(c"hw.optional.arm.FEAT_FlagM"); + let flagm2 = _sysctlbyname(c"hw.optional.arm.FEAT_FlagM2"); + let hbc = _sysctlbyname(c"hw.optional.arm.FEAT_HBC"); + let i8mm = _sysctlbyname(c"hw.optional.arm.FEAT_I8MM"); + let jsconv = _sysctlbyname(c"hw.optional.arm.FEAT_JSCVT"); + let rcpc = _sysctlbyname(c"hw.optional.arm.FEAT_LRCPC"); + let rcpc2 = _sysctlbyname(c"hw.optional.arm.FEAT_LRCPC2"); + let lse = _sysctlbyname(c"hw.optional.arm.FEAT_LSE"); + let lse2 = _sysctlbyname(c"hw.optional.arm.FEAT_LSE2"); + let pauth = _sysctlbyname(c"hw.optional.arm.FEAT_PAuth"); + let pmull = _sysctlbyname(c"hw.optional.arm.FEAT_PMULL"); + let rdm = _sysctlbyname(c"hw.optional.arm.FEAT_RDM"); + let sb = _sysctlbyname(c"hw.optional.arm.FEAT_SB"); + let sha1 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA1"); + let sha256 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA256"); + let sha3 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA3"); + let sha512 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA512"); + let sme = _sysctlbyname(c"hw.optional.arm.FEAT_SME"); + let sme2 = _sysctlbyname(c"hw.optional.arm.FEAT_SME2"); + let sme_f64f64 = _sysctlbyname(c"hw.optional.arm.FEAT_SME_F64F64"); + let sme_i16i64 = _sysctlbyname(c"hw.optional.arm.FEAT_SME_I16I64"); + let ssbs = _sysctlbyname(c"hw.optional.arm.FEAT_SSBS"); + let wfxt = _sysctlbyname(c"hw.optional.arm.FEAT_WFxT"); + + // The following features are not exposed by `is_aarch64_feature_detected`, + // but *are* reported by `sysctl`. They are here as documentation that they + // exist, and may potentially be exposed later. + /* + let afp = _sysctlbyname(c"hw.optional.arm.FEAT_AFP"); + let csv2 = _sysctlbyname(c"hw.optional.arm.FEAT_CSV2"); + let csv3 = _sysctlbyname(c"hw.optional.arm.FEAT_CSV3"); + let ebf16 = _sysctlbyname(c"hw.optional.arm.FEAT_EBF16"); + let fpac = _sysctlbyname(c"hw.optional.arm.FEAT_FPAC"); + let fpaccombine = _sysctlbyname(c"hw.optional.arm.FEAT_FPACCOMBINE"); + let pacimp = _sysctlbyname(c"hw.optional.arm.FEAT_PACIMP"); + let pauth2 = _sysctlbyname(c"hw.optional.arm.FEAT_PAuth2"); + let rpres = _sysctlbyname(c"hw.optional.arm.FEAT_RPRES"); + let specres = _sysctlbyname(c"hw.optional.arm.FEAT_SPECRES"); + let specres2 = _sysctlbyname(c"hw.optional.arm.FEAT_SPECRES2"); + */ + + // The following "features" are reported by `sysctl` but are mandatory parts + // of SME or SME2, and so are not exposed separately by + // `is_aarch64_feature_detected`. They are here to document their + // existence, in case they're needed in the future. + /* + let sme_b16f32 = _sysctlbyname(c"hw.optional.arm.SME_B16F32"); + let sme_bi32i32 = _sysctlbyname(c"hw.optional.arm.SME_BI32I32"); + let sme_f16f32 = _sysctlbyname(c"hw.optional.arm.SME_F16F32"); + let sme_f32f32 = _sysctlbyname(c"hw.optional.arm.SME_F32F32"); + let sme_i16i32 = _sysctlbyname(c"hw.optional.arm.SME_I16I32"); + let sme_i8i32 = _sysctlbyname(c"hw.optional.arm.SME_I8I32"); + */ + + enable_feature(Feature::aes, aes && pmull); + enable_feature(Feature::asimd, asimd); + enable_feature(Feature::bf16, bf16); + enable_feature(Feature::bti, bti); + enable_feature(Feature::crc, crc); + enable_feature(Feature::cssc, cssc); + enable_feature(Feature::dit, dit); + enable_feature(Feature::dotprod, dotprod); + enable_feature(Feature::dpb, dpb); + enable_feature(Feature::dpb2, dpb2); + enable_feature(Feature::ecv, ecv); + enable_feature(Feature::fcma, fcma); + enable_feature(Feature::fhm, fhm); + enable_feature(Feature::flagm, flagm); + enable_feature(Feature::flagm2, flagm2); + enable_feature(Feature::fp, fp); + enable_feature(Feature::fp16, fp16); + enable_feature(Feature::frintts, frintts); + enable_feature(Feature::hbc, hbc); + enable_feature(Feature::i8mm, i8mm); + enable_feature(Feature::jsconv, jsconv); + enable_feature(Feature::lse, lse); + enable_feature(Feature::lse2, lse2); + enable_feature(Feature::paca, pauth); + enable_feature(Feature::pacg, pauth); + enable_feature(Feature::pmull, aes && pmull); + enable_feature(Feature::rcpc, rcpc); + enable_feature(Feature::rcpc2, rcpc2); + enable_feature(Feature::rdm, rdm); + enable_feature(Feature::sb, sb); + enable_feature(Feature::sha2, sha1 && sha256 && asimd); + enable_feature(Feature::sha3, sha512 && sha3 && asimd); + enable_feature(Feature::sme, sme); + enable_feature(Feature::sme2, sme2); + enable_feature(Feature::sme_f64f64, sme_f64f64); + enable_feature(Feature::sme_i16i64, sme_i16i64); + enable_feature(Feature::ssbs, ssbs); + enable_feature(Feature::wfxt, wfxt); + + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs new file mode 100644 index 00000000000..ccc48f53605 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs @@ -0,0 +1,3 @@ +//! Run-time feature detection for Aarch64 on FreeBSD. + +pub(crate) use super::super::aarch64::detect_features; diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs new file mode 100644 index 00000000000..0a15156e1bd --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs @@ -0,0 +1,36 @@ +//! Run-time feature detection for ARM on FreeBSD + +use super::auxvec; +use crate::detect::{Feature, cache}; + +// Defined in machine/elf.h. +// https://github.com/freebsd/freebsd-src/blob/deb63adf945d446ed91a9d84124c71f15ae571d1/sys/arm/include/elf.h +const HWCAP_NEON: usize = 0x00001000; +const HWCAP2_AES: usize = 0x00000001; +const HWCAP2_PMULL: usize = 0x00000002; +const HWCAP2_SHA1: usize = 0x00000004; +const HWCAP2_SHA2: usize = 0x00000008; +const HWCAP2_CRC32: usize = 0x00000010; + +/// Try to read the features from the auxiliary vector +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::neon, auxv.hwcap & HWCAP_NEON != 0); + enable_feature(&mut value, Feature::pmull, auxv.hwcap2 & HWCAP2_PMULL != 0); + enable_feature(&mut value, Feature::crc, auxv.hwcap2 & HWCAP2_CRC32 != 0); + enable_feature(&mut value, Feature::aes, auxv.hwcap2 & HWCAP2_AES != 0); + // SHA2 requires SHA1 & SHA2 features + let sha1 = auxv.hwcap2 & HWCAP2_SHA1 != 0; + let sha2 = auxv.hwcap2 & HWCAP2_SHA2 != 0; + enable_feature(&mut value, Feature::sha2, sha1 && sha2); + return value; + } + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs new file mode 100644 index 00000000000..4e72bf22d76 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs @@ -0,0 +1,66 @@ +//! Parses ELF auxiliary vectors. +#![cfg_attr( + any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc64", + target_arch = "riscv64" + ), + allow(dead_code) +)] + +/// Cache HWCAP bitfields of the ELF Auxiliary Vector. +/// +/// If an entry cannot be read all the bits in the bitfield are set to zero. +/// This should be interpreted as all the features being disabled. +#[derive(Debug, Copy, Clone)] +pub(crate) struct AuxVec { + pub hwcap: usize, + pub hwcap2: usize, +} + +/// ELF Auxiliary Vector +/// +/// The auxiliary vector is a memory region in a running ELF program's stack +/// composed of (key: usize, value: usize) pairs. +/// +/// The keys used in the aux vector are platform dependent. For FreeBSD, they are +/// defined in [sys/elf_common.h][elf_common_h]. The hardware capabilities of a given +/// CPU can be queried with the `AT_HWCAP` and `AT_HWCAP2` keys. +/// +/// Note that run-time feature detection is not invoked for features that can +/// be detected at compile-time. +/// +/// [elf_common.h]: https://svnweb.freebsd.org/base/release/12.0.0/sys/sys/elf_common.h?revision=341707 +pub(crate) fn auxv() -> Result<AuxVec, ()> { + let hwcap = archauxv(libc::AT_HWCAP); + let hwcap2 = archauxv(libc::AT_HWCAP2); + // Zero could indicate that no features were detected, but it's also used to + // indicate an error. In particular, on many platforms AT_HWCAP2 will be + // legitimately zero, since it contains the most recent feature flags. + if hwcap != 0 || hwcap2 != 0 { + return Ok(AuxVec { hwcap, hwcap2 }); + } + Err(()) +} + +/// Tries to read the `key` from the auxiliary vector. +fn archauxv(key: libc::c_int) -> usize { + const OUT_LEN: libc::c_int = core::mem::size_of::<libc::c_ulong>() as libc::c_int; + let mut out: libc::c_ulong = 0; + unsafe { + // elf_aux_info is available on FreeBSD 12.0+ and 11.4+: + // https://github.com/freebsd/freebsd-src/commit/0b08ae2120cdd08c20a2b806e2fcef4d0a36c470 + // https://github.com/freebsd/freebsd-src/blob/release/11.4.0/sys/sys/auxv.h + // FreeBSD 11 support in std has been removed in Rust 1.75 (https://github.com/rust-lang/rust/pull/114521), + // so we can safely use this function. + let res = libc::elf_aux_info( + key, + &mut out as *mut libc::c_ulong as *mut libc::c_void, + OUT_LEN, + ); + // If elf_aux_info fails, `out` will be left at zero (which is the proper default value). + debug_assert!(res == 0 || out == 0); + } + out as usize +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs new file mode 100644 index 00000000000..ade7fb6269d --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs @@ -0,0 +1,22 @@ +//! Run-time feature detection on FreeBSD + +mod auxvec; + +cfg_if::cfg_if! { + if #[cfg(target_arch = "aarch64")] { + mod aarch64; + pub(crate) use self::aarch64::detect_features; + } else if #[cfg(target_arch = "arm")] { + mod arm; + pub(crate) use self::arm::detect_features; + } else if #[cfg(target_arch = "powerpc64")] { + mod powerpc; + pub(crate) use self::powerpc::detect_features; + } else { + use crate::detect::cache; + /// Performs run-time feature detection. + pub(crate) fn detect_features() -> cache::Initializer { + cache::Initializer::default() + } + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs new file mode 100644 index 00000000000..d03af68cd08 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs @@ -0,0 +1,21 @@ +//! Run-time feature detection for PowerPC on FreeBSD. + +use super::auxvec; +use crate::detect::{Feature, cache}; + +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::altivec, auxv.hwcap & 0x10000000 != 0); + enable_feature(&mut value, Feature::vsx, auxv.hwcap & 0x00000080 != 0); + enable_feature(&mut value, Feature::power8, auxv.hwcap2 & 0x80000000 != 0); + return value; + } + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs new file mode 100644 index 00000000000..22a9cefff7b --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs @@ -0,0 +1,484 @@ +//! Run-time feature detection for Aarch64 on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + #[cfg(target_os = "android")] + let is_exynos9810 = { + // Samsung Exynos 9810 has a bug that big and little cores have different + // ISAs. And on older Android (pre-9), the kernel incorrectly reports + // that features available only on some cores are available on all cores. + // https://reviews.llvm.org/D114523 + let mut arch = [0_u8; libc::PROP_VALUE_MAX as usize]; + let len = unsafe { + libc::__system_property_get(c"ro.arch".as_ptr(), arch.as_mut_ptr() as *mut libc::c_char) + }; + // On Exynos, ro.arch is not available on Android 12+, but it is fine + // because Android 9+ includes the fix. + len > 0 && arch.starts_with(b"exynos9810") + }; + #[cfg(not(target_os = "android"))] + let is_exynos9810 = false; + + if let Ok(auxv) = auxvec::auxv() { + let hwcap: AtHwcap = auxv.into(); + return hwcap.cache(is_exynos9810); + } + cache::Initializer::default() +} + +/// These values are part of the platform-specific [asm/hwcap.h][hwcap] . +/// +/// The names match those used for cpuinfo. +/// +/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h +#[derive(Debug, Default, PartialEq)] +struct AtHwcap { + // AT_HWCAP + fp: bool, + asimd: bool, + // evtstrm: No LLVM support. + aes: bool, + pmull: bool, + sha1: bool, + sha2: bool, + crc32: bool, + atomics: bool, + fphp: bool, + asimdhp: bool, + // cpuid: No LLVM support. + asimdrdm: bool, + jscvt: bool, + fcma: bool, + lrcpc: bool, + dcpop: bool, + sha3: bool, + sm3: bool, + sm4: bool, + asimddp: bool, + sha512: bool, + sve: bool, + fhm: bool, + dit: bool, + uscat: bool, + ilrcpc: bool, + flagm: bool, + ssbs: bool, + sb: bool, + paca: bool, + pacg: bool, + + // AT_HWCAP2 + dcpodp: bool, + sve2: bool, + sveaes: bool, + svepmull: bool, + svebitperm: bool, + svesha3: bool, + svesm4: bool, + flagm2: bool, + frint: bool, + // svei8mm: See i8mm feature. + svef32mm: bool, + svef64mm: bool, + // svebf16: See bf16 feature. + i8mm: bool, + bf16: bool, + // dgh: No LLVM support. + rng: bool, + bti: bool, + mte: bool, + ecv: bool, + // afp: bool, + // rpres: bool, + // mte3: bool, + sme: bool, + smei16i64: bool, + smef64f64: bool, + // smei8i32: bool, + // smef16f32: bool, + // smeb16f32: bool, + // smef32f32: bool, + smefa64: bool, + wfxt: bool, + // ebf16: bool, + // sveebf16: bool, + cssc: bool, + // rprfm: bool, + sve2p1: bool, + sme2: bool, + sme2p1: bool, + // smei16i32: bool, + // smebi32i32: bool, + smeb16b16: bool, + smef16f16: bool, + mops: bool, + hbc: bool, + sveb16b16: bool, + lrcpc3: bool, + lse128: bool, + fpmr: bool, + lut: bool, + faminmax: bool, + f8cvt: bool, + f8fma: bool, + f8dp4: bool, + f8dp2: bool, + f8e4m3: bool, + f8e5m2: bool, + smelutv2: bool, + smef8f16: bool, + smef8f32: bool, + smesf8fma: bool, + smesf8dp4: bool, + smesf8dp2: bool, + // pauthlr: bool, +} + +impl From<auxvec::AuxVec> for AtHwcap { + /// Reads AtHwcap from the auxiliary vector. + fn from(auxv: auxvec::AuxVec) -> Self { + AtHwcap { + fp: bit::test(auxv.hwcap, 0), + asimd: bit::test(auxv.hwcap, 1), + // evtstrm: bit::test(auxv.hwcap, 2), + aes: bit::test(auxv.hwcap, 3), + pmull: bit::test(auxv.hwcap, 4), + sha1: bit::test(auxv.hwcap, 5), + sha2: bit::test(auxv.hwcap, 6), + crc32: bit::test(auxv.hwcap, 7), + atomics: bit::test(auxv.hwcap, 8), + fphp: bit::test(auxv.hwcap, 9), + asimdhp: bit::test(auxv.hwcap, 10), + // cpuid: bit::test(auxv.hwcap, 11), + asimdrdm: bit::test(auxv.hwcap, 12), + jscvt: bit::test(auxv.hwcap, 13), + fcma: bit::test(auxv.hwcap, 14), + lrcpc: bit::test(auxv.hwcap, 15), + dcpop: bit::test(auxv.hwcap, 16), + sha3: bit::test(auxv.hwcap, 17), + sm3: bit::test(auxv.hwcap, 18), + sm4: bit::test(auxv.hwcap, 19), + asimddp: bit::test(auxv.hwcap, 20), + sha512: bit::test(auxv.hwcap, 21), + sve: bit::test(auxv.hwcap, 22), + fhm: bit::test(auxv.hwcap, 23), + dit: bit::test(auxv.hwcap, 24), + uscat: bit::test(auxv.hwcap, 25), + ilrcpc: bit::test(auxv.hwcap, 26), + flagm: bit::test(auxv.hwcap, 27), + ssbs: bit::test(auxv.hwcap, 28), + sb: bit::test(auxv.hwcap, 29), + paca: bit::test(auxv.hwcap, 30), + pacg: bit::test(auxv.hwcap, 31), + + // AT_HWCAP2 + dcpodp: bit::test(auxv.hwcap2, 0), + sve2: bit::test(auxv.hwcap2, 1), + sveaes: bit::test(auxv.hwcap2, 2), + svepmull: bit::test(auxv.hwcap2, 3), + svebitperm: bit::test(auxv.hwcap2, 4), + svesha3: bit::test(auxv.hwcap2, 5), + svesm4: bit::test(auxv.hwcap2, 6), + flagm2: bit::test(auxv.hwcap2, 7), + frint: bit::test(auxv.hwcap2, 8), + // svei8mm: bit::test(auxv.hwcap2, 9), + svef32mm: bit::test(auxv.hwcap2, 10), + svef64mm: bit::test(auxv.hwcap2, 11), + // svebf16: bit::test(auxv.hwcap2, 12), + i8mm: bit::test(auxv.hwcap2, 13), + bf16: bit::test(auxv.hwcap2, 14), + // dgh: bit::test(auxv.hwcap2, 15), + rng: bit::test(auxv.hwcap2, 16), + bti: bit::test(auxv.hwcap2, 17), + mte: bit::test(auxv.hwcap2, 18), + ecv: bit::test(auxv.hwcap2, 19), + // afp: bit::test(auxv.hwcap2, 20), + // rpres: bit::test(auxv.hwcap2, 21), + // mte3: bit::test(auxv.hwcap2, 22), + sme: bit::test(auxv.hwcap2, 23), + smei16i64: bit::test(auxv.hwcap2, 24), + smef64f64: bit::test(auxv.hwcap2, 25), + // smei8i32: bit::test(auxv.hwcap2, 26), + // smef16f32: bit::test(auxv.hwcap2, 27), + // smeb16f32: bit::test(auxv.hwcap2, 28), + // smef32f32: bit::test(auxv.hwcap2, 29), + smefa64: bit::test(auxv.hwcap2, 30), + wfxt: bit::test(auxv.hwcap2, 31), + // ebf16: bit::test(auxv.hwcap2, 32), + // sveebf16: bit::test(auxv.hwcap2, 33), + cssc: bit::test(auxv.hwcap2, 34), + // rprfm: bit::test(auxv.hwcap2, 35), + sve2p1: bit::test(auxv.hwcap2, 36), + sme2: bit::test(auxv.hwcap2, 37), + sme2p1: bit::test(auxv.hwcap2, 38), + // smei16i32: bit::test(auxv.hwcap2, 39), + // smebi32i32: bit::test(auxv.hwcap2, 40), + smeb16b16: bit::test(auxv.hwcap2, 41), + smef16f16: bit::test(auxv.hwcap2, 42), + mops: bit::test(auxv.hwcap2, 43), + hbc: bit::test(auxv.hwcap2, 44), + sveb16b16: bit::test(auxv.hwcap2, 45), + lrcpc3: bit::test(auxv.hwcap2, 46), + lse128: bit::test(auxv.hwcap2, 47), + fpmr: bit::test(auxv.hwcap2, 48), + lut: bit::test(auxv.hwcap2, 49), + faminmax: bit::test(auxv.hwcap2, 50), + f8cvt: bit::test(auxv.hwcap2, 51), + f8fma: bit::test(auxv.hwcap2, 52), + f8dp4: bit::test(auxv.hwcap2, 53), + f8dp2: bit::test(auxv.hwcap2, 54), + f8e4m3: bit::test(auxv.hwcap2, 55), + f8e5m2: bit::test(auxv.hwcap2, 56), + smelutv2: bit::test(auxv.hwcap2, 57), + smef8f16: bit::test(auxv.hwcap2, 58), + smef8f32: bit::test(auxv.hwcap2, 59), + smesf8fma: bit::test(auxv.hwcap2, 60), + smesf8dp4: bit::test(auxv.hwcap2, 61), + smesf8dp2: bit::test(auxv.hwcap2, 62), + // pauthlr: bit::test(auxv.hwcap2, ??), + } + } +} + +impl AtHwcap { + /// Initializes the cache from the feature -bits. + /// + /// The feature dependencies here come directly from LLVM's feature definitions: + /// https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AArch64/AArch64.td + fn cache(self, is_exynos9810: bool) -> cache::Initializer { + let mut value = cache::Initializer::default(); + { + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + // Samsung Exynos 9810 has a bug that big and little cores have different + // ISAs. And on older Android (pre-9), the kernel incorrectly reports + // that features available only on some cores are available on all cores. + // So, only check features that are known to be available on exynos-m3: + // $ rustc --print cfg --target aarch64-linux-android -C target-cpu=exynos-m3 | grep target_feature + // See also https://github.com/rust-lang/stdarch/pull/1378#discussion_r1103748342. + if is_exynos9810 { + enable_feature(Feature::fp, self.fp); + enable_feature(Feature::crc, self.crc32); + // ASIMD support requires float support - if half-floats are + // supported, it also requires half-float support: + let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp); + enable_feature(Feature::asimd, asimd); + // Cryptographic extensions require ASIMD + // AES also covers FEAT_PMULL + enable_feature(Feature::aes, self.aes && self.pmull && asimd); + enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd); + return value; + } + + enable_feature(Feature::fp, self.fp); + // Half-float support requires float support + enable_feature(Feature::fp16, self.fp && self.fphp); + // FHM (fp16fml in LLVM) requires half float support + enable_feature(Feature::fhm, self.fphp && self.fhm); + enable_feature(Feature::pmull, self.pmull); + enable_feature(Feature::crc, self.crc32); + enable_feature(Feature::lse, self.atomics); + enable_feature(Feature::lse2, self.uscat); + enable_feature(Feature::lse128, self.lse128 && self.atomics); + enable_feature(Feature::rcpc, self.lrcpc); + // RCPC2 (rcpc-immo in LLVM) requires RCPC support + let rcpc2 = self.ilrcpc && self.lrcpc; + enable_feature(Feature::rcpc2, rcpc2); + enable_feature(Feature::rcpc3, self.lrcpc3 && rcpc2); + enable_feature(Feature::dit, self.dit); + enable_feature(Feature::flagm, self.flagm); + enable_feature(Feature::flagm2, self.flagm2); + enable_feature(Feature::ssbs, self.ssbs); + enable_feature(Feature::sb, self.sb); + enable_feature(Feature::paca, self.paca); + enable_feature(Feature::pacg, self.pacg); + // enable_feature(Feature::pauth_lr, self.pauthlr); + enable_feature(Feature::dpb, self.dcpop); + enable_feature(Feature::dpb2, self.dcpodp); + enable_feature(Feature::rand, self.rng); + enable_feature(Feature::bti, self.bti); + enable_feature(Feature::mte, self.mte); + // jsconv requires float support + enable_feature(Feature::jsconv, self.jscvt && self.fp); + enable_feature(Feature::rdm, self.asimdrdm); + enable_feature(Feature::dotprod, self.asimddp); + enable_feature(Feature::frintts, self.frint); + + // FEAT_I8MM & FEAT_BF16 also include optional SVE components which linux exposes + // separately. We ignore that distinction here. + enable_feature(Feature::i8mm, self.i8mm); + enable_feature(Feature::bf16, self.bf16); + + // ASIMD support requires float support - if half-floats are + // supported, it also requires half-float support: + let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp); + enable_feature(Feature::asimd, asimd); + // ASIMD extensions require ASIMD support: + enable_feature(Feature::fcma, self.fcma && asimd); + enable_feature(Feature::sve, self.sve && asimd); + + // SVE extensions require SVE & ASIMD + enable_feature(Feature::f32mm, self.svef32mm && self.sve && asimd); + enable_feature(Feature::f64mm, self.svef64mm && self.sve && asimd); + + // Cryptographic extensions require ASIMD + enable_feature(Feature::aes, self.aes && asimd); + enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd); + // SHA512/SHA3 require SHA1 & SHA256 + enable_feature( + Feature::sha3, + self.sha512 && self.sha3 && self.sha1 && self.sha2 && asimd, + ); + enable_feature(Feature::sm4, self.sm3 && self.sm4 && asimd); + + // SVE2 requires SVE + let sve2 = self.sve2 && self.sve && asimd; + enable_feature(Feature::sve2, sve2); + enable_feature(Feature::sve2p1, self.sve2p1 && sve2); + // SVE2 extensions require SVE2 and crypto features + enable_feature( + Feature::sve2_aes, + self.sveaes && self.svepmull && sve2 && self.aes, + ); + enable_feature( + Feature::sve2_sm4, + self.svesm4 && sve2 && self.sm3 && self.sm4, + ); + enable_feature( + Feature::sve2_sha3, + self.svesha3 && sve2 && self.sha512 && self.sha3 && self.sha1 && self.sha2, + ); + enable_feature(Feature::sve2_bitperm, self.svebitperm && self.sve2); + enable_feature(Feature::sve_b16b16, self.bf16 && self.sveb16b16); + enable_feature(Feature::hbc, self.hbc); + enable_feature(Feature::mops, self.mops); + enable_feature(Feature::ecv, self.ecv); + enable_feature(Feature::lut, self.lut); + enable_feature(Feature::cssc, self.cssc); + enable_feature(Feature::fpmr, self.fpmr); + enable_feature(Feature::faminmax, self.faminmax); + let fp8 = self.f8cvt && self.faminmax && self.lut && self.bf16; + enable_feature(Feature::fp8, fp8); + let fp8fma = self.f8fma && fp8; + enable_feature(Feature::fp8fma, fp8fma); + let fp8dot4 = self.f8dp4 && fp8fma; + enable_feature(Feature::fp8dot4, fp8dot4); + enable_feature(Feature::fp8dot2, self.f8dp2 && fp8dot4); + enable_feature(Feature::wfxt, self.wfxt); + let sme = self.sme && self.bf16; + enable_feature(Feature::sme, sme); + enable_feature(Feature::sme_i16i64, self.smei16i64 && sme); + enable_feature(Feature::sme_f64f64, self.smef64f64 && sme); + enable_feature(Feature::sme_fa64, self.smefa64 && sme && sve2); + let sme2 = self.sme2 && sme; + enable_feature(Feature::sme2, sme2); + enable_feature(Feature::sme2p1, self.sme2p1 && sme2); + enable_feature( + Feature::sme_b16b16, + sme2 && self.bf16 && self.sveb16b16 && self.smeb16b16, + ); + enable_feature(Feature::sme_f16f16, self.smef16f16 && sme2); + enable_feature(Feature::sme_lutv2, self.smelutv2); + let sme_f8f32 = self.smef8f32 && sme2 && fp8; + enable_feature(Feature::sme_f8f32, sme_f8f32); + enable_feature(Feature::sme_f8f16, self.smef8f16 && sme_f8f32); + let ssve_fp8fma = self.smesf8fma && sme2 && fp8; + enable_feature(Feature::ssve_fp8fma, ssve_fp8fma); + let ssve_fp8dot4 = self.smesf8dp4 && ssve_fp8fma; + enable_feature(Feature::ssve_fp8dot4, ssve_fp8dot4); + enable_feature(Feature::ssve_fp8dot2, self.smesf8dp2 && ssve_fp8dot4); + } + value + } +} + +#[cfg(target_endian = "little")] +#[cfg(test)] +mod tests { + use super::*; + + #[cfg(feature = "std_detect_file_io")] + mod auxv_from_file { + use super::auxvec::auxv_from_file; + use super::*; + // The baseline hwcaps used in the (artificial) auxv test files. + fn baseline_hwcaps() -> AtHwcap { + AtHwcap { + fp: true, + asimd: true, + aes: true, + pmull: true, + sha1: true, + sha2: true, + crc32: true, + atomics: true, + fphp: true, + asimdhp: true, + asimdrdm: true, + lrcpc: true, + dcpop: true, + asimddp: true, + ssbs: true, + ..AtHwcap::default() + } + } + + #[test] + fn linux_empty_hwcap2_aarch64() { + let file = concat!( + env!("CARGO_MANIFEST_DIR"), + "/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv" + ); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + println!("HWCAP : 0x{:0x}", v.hwcap); + println!("HWCAP2: 0x{:0x}", v.hwcap2); + assert_eq!(AtHwcap::from(v), baseline_hwcaps()); + } + #[test] + fn linux_no_hwcap2_aarch64() { + let file = concat!( + env!("CARGO_MANIFEST_DIR"), + "/src/detect/test_data/linux-no-hwcap2-aarch64.auxv" + ); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + println!("HWCAP : 0x{:0x}", v.hwcap); + println!("HWCAP2: 0x{:0x}", v.hwcap2); + assert_eq!(AtHwcap::from(v), baseline_hwcaps()); + } + #[test] + fn linux_hwcap2_aarch64() { + let file = concat!( + env!("CARGO_MANIFEST_DIR"), + "/src/detect/test_data/linux-hwcap2-aarch64.auxv" + ); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + println!("HWCAP : 0x{:0x}", v.hwcap); + println!("HWCAP2: 0x{:0x}", v.hwcap2); + assert_eq!( + AtHwcap::from(v), + AtHwcap { + // Some other HWCAP bits. + paca: true, + pacg: true, + // HWCAP2-only bits. + dcpodp: true, + frint: true, + rng: true, + bti: true, + mte: true, + ..baseline_hwcaps() + } + ); + } + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs new file mode 100644 index 00000000000..bbb173227d0 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs @@ -0,0 +1,34 @@ +//! Run-time feature detection for ARM on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm/include/uapi/asm/hwcap.h + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::i8mm, bit::test(auxv.hwcap, 27)); + enable_feature(&mut value, Feature::dotprod, bit::test(auxv.hwcap, 24)); + enable_feature(&mut value, Feature::neon, bit::test(auxv.hwcap, 12)); + enable_feature(&mut value, Feature::pmull, bit::test(auxv.hwcap2, 1)); + enable_feature(&mut value, Feature::crc, bit::test(auxv.hwcap2, 4)); + enable_feature(&mut value, Feature::aes, bit::test(auxv.hwcap2, 0)); + // SHA2 requires SHA1 & SHA2 features + enable_feature( + &mut value, + Feature::sha2, + bit::test(auxv.hwcap2, 2) && bit::test(auxv.hwcap2, 3), + ); + return value; + } + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs new file mode 100644 index 00000000000..c30379ff065 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs @@ -0,0 +1,339 @@ +//! Parses ELF auxiliary vectors. +#![allow(dead_code)] + +pub(crate) const AT_NULL: usize = 0; + +/// Key to access the CPU Hardware capabilities bitfield. +pub(crate) const AT_HWCAP: usize = 16; +/// Key to access the CPU Hardware capabilities 2 bitfield. +#[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", +))] +pub(crate) const AT_HWCAP2: usize = 26; + +/// Cache HWCAP bitfields of the ELF Auxiliary Vector. +/// +/// If an entry cannot be read all the bits in the bitfield are set to zero. +/// This should be interpreted as all the features being disabled. +#[derive(Debug, Copy, Clone)] +#[cfg_attr(test, derive(PartialEq))] +pub(crate) struct AuxVec { + pub hwcap: usize, + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + pub hwcap2: usize, +} + +/// ELF Auxiliary Vector +/// +/// The auxiliary vector is a memory region in a running ELF program's stack +/// composed of (key: usize, value: usize) pairs. +/// +/// The keys used in the aux vector are platform dependent. For Linux, they are +/// defined in [linux/auxvec.h][auxvec_h]. The hardware capabilities of a given +/// CPU can be queried with the `AT_HWCAP` and `AT_HWCAP2` keys. +/// +/// There is no perfect way of reading the auxiliary vector. +/// +/// - If the `std_detect_dlsym_getauxval` cargo feature is enabled, this will use +/// `getauxval` if its linked to the binary, and otherwise proceed to a fallback implementation. +/// When `std_detect_dlsym_getauxval` is disabled, this will assume that `getauxval` is +/// linked to the binary - if that is not the case the behavior is undefined. +/// - Otherwise, if the `std_detect_file_io` cargo feature is enabled, it will +/// try to read `/proc/self/auxv`. +/// - If that fails, this function returns an error. +/// +/// Note that run-time feature detection is not invoked for features that can +/// be detected at compile-time. +/// +/// Note: The `std_detect_dlsym_getauxval` cargo feature is ignored on +/// `*-linux-{gnu,musl,ohos}*` and `*-android*` targets because we can safely assume `getauxval` +/// is linked to the binary. +/// - `*-linux-gnu*` targets ([since Rust 1.64](https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html)) +/// have glibc requirements higher than [glibc 2.16 that added `getauxval`](https://sourceware.org/legacy-ml/libc-announce/2012/msg00000.html). +/// - `*-linux-musl*` targets ([at least since Rust 1.15](https://github.com/rust-lang/rust/blob/1.15.0/src/ci/docker/x86_64-musl/build-musl.sh#L15)) +/// use musl newer than [musl 1.1.0 that added `getauxval`](https://git.musl-libc.org/cgit/musl/tree/WHATSNEW?h=v1.1.0#n1197) +/// - `*-linux-ohos*` targets use a [fork of musl 1.2](https://gitee.com/openharmony/docs/blob/master/en/application-dev/reference/native-lib/musl.md) +/// - `*-android*` targets ([since Rust 1.68](https://blog.rust-lang.org/2023/01/09/android-ndk-update-r25.html)) +/// have the minimum supported API level higher than [Android 4.3 (API level 18) that added `getauxval`](https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/auxv.h#L49). +/// +/// For more information about when `getauxval` is available check the great +/// [`auxv` crate documentation][auxv_docs]. +/// +/// [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h +/// [auxv_docs]: https://docs.rs/auxv/0.3.3/auxv/ +pub(crate) fn auxv() -> Result<AuxVec, ()> { + // Try to call a getauxval function. + if let Ok(hwcap) = getauxval(AT_HWCAP) { + // Targets with only AT_HWCAP: + #[cfg(any( + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "mips", + target_arch = "mips64", + target_arch = "loongarch64", + ))] + { + // Zero could indicate that no features were detected, but it's also used to indicate + // an error. In either case, try the fallback. + if hwcap != 0 { + return Ok(AuxVec { hwcap }); + } + } + + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + { + if let Ok(hwcap2) = getauxval(AT_HWCAP2) { + // Zero could indicate that no features were detected, but it's also used to indicate + // an error. In particular, on many platforms AT_HWCAP2 will be legitimately zero, + // since it contains the most recent feature flags. Use the fallback only if no + // features were detected at all. + if hwcap != 0 || hwcap2 != 0 { + return Ok(AuxVec { hwcap, hwcap2 }); + } + } + } + + // Intentionnaly not used + let _ = hwcap; + } + + #[cfg(feature = "std_detect_file_io")] + { + // If calling getauxval fails, try to read the auxiliary vector from + // its file: + auxv_from_file("/proc/self/auxv") + } + #[cfg(not(feature = "std_detect_file_io"))] + { + Err(()) + } +} + +/// Tries to read the `key` from the auxiliary vector by calling the +/// `getauxval` function. If the function is not linked, this function return `Err`. +fn getauxval(key: usize) -> Result<usize, ()> { + type F = unsafe extern "C" fn(libc::c_ulong) -> libc::c_ulong; + cfg_if::cfg_if! { + if #[cfg(all( + feature = "std_detect_dlsym_getauxval", + not(all( + target_os = "linux", + any(target_env = "gnu", target_env = "musl", target_env = "ohos"), + )), + not(target_os = "android"), + ))] { + let ffi_getauxval: F = unsafe { + let ptr = libc::dlsym(libc::RTLD_DEFAULT, c"getauxval".as_ptr()); + if ptr.is_null() { + return Err(()); + } + core::mem::transmute(ptr) + }; + } else { + let ffi_getauxval: F = libc::getauxval; + } + } + Ok(unsafe { ffi_getauxval(key as libc::c_ulong) as usize }) +} + +/// Tries to read the auxiliary vector from the `file`. If this fails, this +/// function returns `Err`. +#[cfg(feature = "std_detect_file_io")] +pub(super) fn auxv_from_file(file: &str) -> Result<AuxVec, ()> { + let file = super::read_file(file)?; + + // See <https://github.com/torvalds/linux/blob/v5.15/include/uapi/linux/auxvec.h>. + // + // The auxiliary vector contains at most 34 (key,value) fields: from + // `AT_MINSIGSTKSZ` to `AT_NULL`, but its number may increase. + let len = file.len(); + let mut buf = alloc::vec![0_usize; 1 + len / core::mem::size_of::<usize>()]; + unsafe { + core::ptr::copy_nonoverlapping(file.as_ptr(), buf.as_mut_ptr() as *mut u8, len); + } + + auxv_from_buf(&buf) +} + +/// Tries to interpret the `buffer` as an auxiliary vector. If that fails, this +/// function returns `Err`. +#[cfg(feature = "std_detect_file_io")] +fn auxv_from_buf(buf: &[usize]) -> Result<AuxVec, ()> { + // Targets with only AT_HWCAP: + #[cfg(any( + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "mips", + target_arch = "mips64", + target_arch = "loongarch64", + ))] + { + for el in buf.chunks(2) { + match el[0] { + AT_NULL => break, + AT_HWCAP => return Ok(AuxVec { hwcap: el[1] }), + _ => (), + } + } + } + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + { + let mut hwcap = None; + // For some platforms, AT_HWCAP2 was added recently, so let it default to zero. + let mut hwcap2 = 0; + for el in buf.chunks(2) { + match el[0] { + AT_NULL => break, + AT_HWCAP => hwcap = Some(el[1]), + AT_HWCAP2 => hwcap2 = el[1], + _ => (), + } + } + + if let Some(hwcap) = hwcap { + return Ok(AuxVec { hwcap, hwcap2 }); + } + } + // Suppress unused variable + let _ = buf; + Err(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + // FIXME: on mips/mips64 getauxval returns 0, and /proc/self/auxv + // does not always contain the AT_HWCAP key under qemu. + #[cfg(any( + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + #[test] + fn auxv_crate() { + let v = auxv(); + if let Ok(hwcap) = getauxval(AT_HWCAP) { + let rt_hwcap = v.expect("failed to find hwcap key").hwcap; + assert_eq!(rt_hwcap, hwcap); + } + + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + { + if let Ok(hwcap2) = getauxval(AT_HWCAP2) { + let rt_hwcap2 = v.expect("failed to find hwcap2 key").hwcap2; + assert_eq!(rt_hwcap2, hwcap2); + } + } + } + + #[test] + fn auxv_dump() { + if let Ok(auxvec) = auxv() { + println!("{:?}", auxvec); + } else { + println!("both getauxval() and reading /proc/self/auxv failed!"); + } + } + + #[cfg(feature = "std_detect_file_io")] + cfg_if::cfg_if! { + if #[cfg(target_arch = "arm")] { + #[test] + fn linux_rpi3() { + let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-rpi3.auxv"); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + assert_eq!(v.hwcap, 4174038); + assert_eq!(v.hwcap2, 16); + } + + #[test] + fn linux_macos_vb() { + let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv"); + println!("file: {file}"); + // The file contains HWCAP but not HWCAP2. In that case, we treat HWCAP2 as zero. + let v = auxv_from_file(file).unwrap(); + assert_eq!(v.hwcap, 126614527); + assert_eq!(v.hwcap2, 0); + } + } else if #[cfg(target_arch = "aarch64")] { + #[cfg(target_endian = "little")] + #[test] + fn linux_artificial_aarch64() { + let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-artificial-aarch64.auxv"); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + assert_eq!(v.hwcap, 0x0123456789abcdef); + assert_eq!(v.hwcap2, 0x02468ace13579bdf); + } + #[cfg(target_endian = "little")] + #[test] + fn linux_no_hwcap2_aarch64() { + let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-no-hwcap2-aarch64.auxv"); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + // An absent HWCAP2 is treated as zero, and does not prevent acceptance of HWCAP. + assert_ne!(v.hwcap, 0); + assert_eq!(v.hwcap2, 0); + } + } + } + + #[test] + #[cfg(feature = "std_detect_file_io")] + fn auxv_dump_procfs() { + if let Ok(auxvec) = auxv_from_file("/proc/self/auxv") { + println!("{:?}", auxvec); + } else { + println!("reading /proc/self/auxv failed!"); + } + } + + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + #[test] + #[cfg(feature = "std_detect_file_io")] + fn auxv_crate_procfs() { + if let Ok(procfs_auxv) = auxv_from_file("/proc/self/auxv") { + assert_eq!(auxv().unwrap(), procfs_auxv); + } + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs new file mode 100644 index 00000000000..14cc7a73183 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/loongarch.rs @@ -0,0 +1,68 @@ +//! Run-time feature detection for LoongArch on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; +use core::arch::asm; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, feature, enable| { + if enable { + value.set(feature as u32); + } + }; + + // The values are part of the platform-specific [cpucfg] + // + // [cpucfg]: LoongArch Reference Manual Volume 1: Basic Architecture v1.1 + let cpucfg2: usize; + unsafe { + asm!( + "cpucfg {}, {}", + out(reg) cpucfg2, in(reg) 2, + options(pure, nomem, preserves_flags, nostack) + ); + } + let cpucfg3: usize; + unsafe { + asm!( + "cpucfg {}, {}", + out(reg) cpucfg3, in(reg) 3, + options(pure, nomem, preserves_flags, nostack) + ); + } + enable_feature(&mut value, Feature::frecipe, bit::test(cpucfg2, 25)); + enable_feature(&mut value, Feature::div32, bit::test(cpucfg2, 26)); + enable_feature(&mut value, Feature::lam_bh, bit::test(cpucfg2, 27)); + enable_feature(&mut value, Feature::lamcas, bit::test(cpucfg2, 28)); + enable_feature(&mut value, Feature::scq, bit::test(cpucfg2, 30)); + enable_feature(&mut value, Feature::ld_seq_sa, bit::test(cpucfg3, 23)); + + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/loongarch/include/uapi/asm/hwcap.h + if let Ok(auxv) = auxvec::auxv() { + enable_feature( + &mut value, + Feature::f, + bit::test(cpucfg2, 1) && bit::test(auxv.hwcap, 3), + ); + enable_feature( + &mut value, + Feature::d, + bit::test(cpucfg2, 2) && bit::test(auxv.hwcap, 3), + ); + enable_feature(&mut value, Feature::lsx, bit::test(auxv.hwcap, 4)); + enable_feature(&mut value, Feature::lasx, bit::test(auxv.hwcap, 5)); + enable_feature( + &mut value, + Feature::lbt, + bit::test(auxv.hwcap, 10) && bit::test(auxv.hwcap, 11) && bit::test(auxv.hwcap, 12), + ); + enable_feature(&mut value, Feature::lvz, bit::test(auxv.hwcap, 9)); + enable_feature(&mut value, Feature::ual, bit::test(auxv.hwcap, 2)); + return value; + } + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs new file mode 100644 index 00000000000..0cfa8869887 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs @@ -0,0 +1,23 @@ +//! Run-time feature detection for MIPS on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/mips/include/uapi/asm/hwcap.h + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::msa, bit::test(auxv.hwcap, 1)); + return value; + } + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs new file mode 100644 index 00000000000..8c689d0b1f0 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs @@ -0,0 +1,67 @@ +//! Run-time feature detection on Linux +//! +#[cfg(feature = "std_detect_file_io")] +use alloc::vec::Vec; + +mod auxvec; + +#[cfg(feature = "std_detect_file_io")] +fn read_file(path: &str) -> Result<Vec<u8>, ()> { + let mut path = Vec::from(path.as_bytes()); + path.push(0); + + unsafe { + let file = libc::open(path.as_ptr() as *const libc::c_char, libc::O_RDONLY); + if file == -1 { + return Err(()); + } + + let mut data = Vec::new(); + loop { + data.reserve(4096); + let spare = data.spare_capacity_mut(); + match libc::read(file, spare.as_mut_ptr() as *mut _, spare.len()) { + -1 => { + libc::close(file); + return Err(()); + } + 0 => break, + n => data.set_len(data.len() + n as usize), + } + } + + libc::close(file); + Ok(data) + } +} + +cfg_if::cfg_if! { + if #[cfg(target_arch = "aarch64")] { + mod aarch64; + pub(crate) use self::aarch64::detect_features; + } else if #[cfg(target_arch = "arm")] { + mod arm; + pub(crate) use self::arm::detect_features; + } else if #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] { + mod riscv; + pub(crate) use self::riscv::detect_features; + } else if #[cfg(any(target_arch = "mips", target_arch = "mips64"))] { + mod mips; + pub(crate) use self::mips::detect_features; + } else if #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] { + mod powerpc; + pub(crate) use self::powerpc::detect_features; + } else if #[cfg(target_arch = "loongarch64")] { + mod loongarch; + pub(crate) use self::loongarch::detect_features; + } else if #[cfg(target_arch = "s390x")] { + mod s390x; + pub(crate) use self::s390x::detect_features; + } else { + use crate::detect::cache; + /// Performs run-time feature detection. + pub(crate) fn detect_features() -> cache::Initializer { + cache::Initializer::default() + } + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs new file mode 100644 index 00000000000..6a4f7e715d9 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs @@ -0,0 +1,35 @@ +//! Run-time feature detection for PowerPC on Linux. + +use super::auxvec; +use crate::detect::{Feature, cache}; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + // The values are part of the platform-specific [asm/cputable.h][cputable] + // + // [cputable]: https://github.com/torvalds/linux/blob/master/arch/powerpc/include/uapi/asm/cputable.h + if let Ok(auxv) = auxvec::auxv() { + // note: the PowerPC values are the mask to do the test (instead of the + // index of the bit to test like in ARM and Aarch64) + enable_feature(&mut value, Feature::altivec, auxv.hwcap & 0x10000000 != 0); + enable_feature(&mut value, Feature::vsx, auxv.hwcap & 0x00000080 != 0); + let power8_features = auxv.hwcap2 & 0x80000000 != 0; + enable_feature(&mut value, Feature::power8, power8_features); + enable_feature(&mut value, Feature::power8_altivec, power8_features); + enable_feature(&mut value, Feature::power8_crypto, power8_features); + enable_feature(&mut value, Feature::power8_vector, power8_features); + let power9_features = auxv.hwcap2 & 0x00800000 != 0; + enable_feature(&mut value, Feature::power9, power9_features); + enable_feature(&mut value, Feature::power9_altivec, power9_features); + enable_feature(&mut value, Feature::power9_vector, power9_features); + return value; + } + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs new file mode 100644 index 00000000000..5506ff31fc7 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs @@ -0,0 +1,330 @@ +//! Run-time feature detection for RISC-V on Linux. +//! +//! On RISC-V, detection using auxv only supports single-letter extensions. +//! So, we use riscv_hwprobe that supports multi-letter extensions if available. +//! <https://www.kernel.org/doc/html/latest/arch/riscv/hwprobe.html> + +use core::ptr; + +use super::super::riscv::imply_features; +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +// See <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/prctl.h?h=v6.15> +// for runtime status query constants. +const PR_RISCV_V_GET_CONTROL: libc::c_int = 70; +const PR_RISCV_V_VSTATE_CTRL_ON: libc::c_int = 2; +const PR_RISCV_V_VSTATE_CTRL_CUR_MASK: libc::c_int = 3; + +// See <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/include/uapi/asm/hwprobe.h?h=v6.15> +// for riscv_hwprobe struct and hardware probing constants. + +#[repr(C)] +struct riscv_hwprobe { + key: i64, + value: u64, +} + +#[allow(non_upper_case_globals)] +const __NR_riscv_hwprobe: libc::c_long = 258; + +const RISCV_HWPROBE_KEY_BASE_BEHAVIOR: i64 = 3; +const RISCV_HWPROBE_BASE_BEHAVIOR_IMA: u64 = 1 << 0; + +const RISCV_HWPROBE_KEY_IMA_EXT_0: i64 = 4; +const RISCV_HWPROBE_IMA_FD: u64 = 1 << 0; +const RISCV_HWPROBE_IMA_C: u64 = 1 << 1; +const RISCV_HWPROBE_IMA_V: u64 = 1 << 2; +const RISCV_HWPROBE_EXT_ZBA: u64 = 1 << 3; +const RISCV_HWPROBE_EXT_ZBB: u64 = 1 << 4; +const RISCV_HWPROBE_EXT_ZBS: u64 = 1 << 5; +const RISCV_HWPROBE_EXT_ZICBOZ: u64 = 1 << 6; +const RISCV_HWPROBE_EXT_ZBC: u64 = 1 << 7; +const RISCV_HWPROBE_EXT_ZBKB: u64 = 1 << 8; +const RISCV_HWPROBE_EXT_ZBKC: u64 = 1 << 9; +const RISCV_HWPROBE_EXT_ZBKX: u64 = 1 << 10; +const RISCV_HWPROBE_EXT_ZKND: u64 = 1 << 11; +const RISCV_HWPROBE_EXT_ZKNE: u64 = 1 << 12; +const RISCV_HWPROBE_EXT_ZKNH: u64 = 1 << 13; +const RISCV_HWPROBE_EXT_ZKSED: u64 = 1 << 14; +const RISCV_HWPROBE_EXT_ZKSH: u64 = 1 << 15; +const RISCV_HWPROBE_EXT_ZKT: u64 = 1 << 16; +const RISCV_HWPROBE_EXT_ZVBB: u64 = 1 << 17; +const RISCV_HWPROBE_EXT_ZVBC: u64 = 1 << 18; +const RISCV_HWPROBE_EXT_ZVKB: u64 = 1 << 19; +const RISCV_HWPROBE_EXT_ZVKG: u64 = 1 << 20; +const RISCV_HWPROBE_EXT_ZVKNED: u64 = 1 << 21; +const RISCV_HWPROBE_EXT_ZVKNHA: u64 = 1 << 22; +const RISCV_HWPROBE_EXT_ZVKNHB: u64 = 1 << 23; +const RISCV_HWPROBE_EXT_ZVKSED: u64 = 1 << 24; +const RISCV_HWPROBE_EXT_ZVKSH: u64 = 1 << 25; +const RISCV_HWPROBE_EXT_ZVKT: u64 = 1 << 26; +const RISCV_HWPROBE_EXT_ZFH: u64 = 1 << 27; +const RISCV_HWPROBE_EXT_ZFHMIN: u64 = 1 << 28; +const RISCV_HWPROBE_EXT_ZIHINTNTL: u64 = 1 << 29; +const RISCV_HWPROBE_EXT_ZVFH: u64 = 1 << 30; +const RISCV_HWPROBE_EXT_ZVFHMIN: u64 = 1 << 31; +const RISCV_HWPROBE_EXT_ZFA: u64 = 1 << 32; +const RISCV_HWPROBE_EXT_ZTSO: u64 = 1 << 33; +const RISCV_HWPROBE_EXT_ZACAS: u64 = 1 << 34; +const RISCV_HWPROBE_EXT_ZICOND: u64 = 1 << 35; +const RISCV_HWPROBE_EXT_ZIHINTPAUSE: u64 = 1 << 36; +const RISCV_HWPROBE_EXT_ZVE32X: u64 = 1 << 37; +const RISCV_HWPROBE_EXT_ZVE32F: u64 = 1 << 38; +const RISCV_HWPROBE_EXT_ZVE64X: u64 = 1 << 39; +const RISCV_HWPROBE_EXT_ZVE64F: u64 = 1 << 40; +const RISCV_HWPROBE_EXT_ZVE64D: u64 = 1 << 41; +const RISCV_HWPROBE_EXT_ZIMOP: u64 = 1 << 42; +const RISCV_HWPROBE_EXT_ZCA: u64 = 1 << 43; +const RISCV_HWPROBE_EXT_ZCB: u64 = 1 << 44; +const RISCV_HWPROBE_EXT_ZCD: u64 = 1 << 45; +const RISCV_HWPROBE_EXT_ZCF: u64 = 1 << 46; +const RISCV_HWPROBE_EXT_ZCMOP: u64 = 1 << 47; +const RISCV_HWPROBE_EXT_ZAWRS: u64 = 1 << 48; +// Excluded because it only reports the existence of `prctl`-based pointer masking control. +// const RISCV_HWPROBE_EXT_SUPM: u64 = 1 << 49; +const RISCV_HWPROBE_EXT_ZICNTR: u64 = 1 << 50; +const RISCV_HWPROBE_EXT_ZIHPM: u64 = 1 << 51; +const RISCV_HWPROBE_EXT_ZFBFMIN: u64 = 1 << 52; +const RISCV_HWPROBE_EXT_ZVFBFMIN: u64 = 1 << 53; +const RISCV_HWPROBE_EXT_ZVFBFWMA: u64 = 1 << 54; +const RISCV_HWPROBE_EXT_ZICBOM: u64 = 1 << 55; +const RISCV_HWPROBE_EXT_ZAAMO: u64 = 1 << 56; +const RISCV_HWPROBE_EXT_ZALRSC: u64 = 1 << 57; + +const RISCV_HWPROBE_KEY_CPUPERF_0: i64 = 5; +const RISCV_HWPROBE_MISALIGNED_FAST: u64 = 3; +const RISCV_HWPROBE_MISALIGNED_MASK: u64 = 7; + +const RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF: i64 = 9; +const RISCV_HWPROBE_MISALIGNED_SCALAR_FAST: u64 = 3; + +const RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF: i64 = 10; +const RISCV_HWPROBE_MISALIGNED_VECTOR_FAST: u64 = 3; + +// syscall returns an unsupported error if riscv_hwprobe is not supported, +// so we can safely use this function on older versions of Linux. +fn _riscv_hwprobe(out: &mut [riscv_hwprobe]) -> bool { + unsafe fn __riscv_hwprobe( + pairs: *mut riscv_hwprobe, + pair_count: libc::size_t, + cpu_set_size: libc::size_t, + cpus: *mut libc::c_ulong, + flags: libc::c_uint, + ) -> libc::c_long { + unsafe { + libc::syscall( + __NR_riscv_hwprobe, + pairs, + pair_count, + cpu_set_size, + cpus, + flags, + ) + } + } + + let len = out.len(); + unsafe { __riscv_hwprobe(out.as_mut_ptr(), len, 0, ptr::null_mut(), 0) == 0 } +} + +/// Read list of supported features from (1) the auxiliary vector +/// and (2) the results of `riscv_hwprobe` and `prctl` system calls. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let mut enable_feature = |feature, enable| { + if enable { + value.set(feature as u32); + } + }; + + // Use auxiliary vector to enable single-letter ISA extensions. + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/include/uapi/asm/hwcap.h?h=v6.15 + let auxv = auxvec::auxv().expect("read auxvec"); // should not fail on RISC-V platform + let mut has_i = bit::test(auxv.hwcap, (b'i' - b'a').into()); + #[allow(clippy::eq_op)] + enable_feature(Feature::a, bit::test(auxv.hwcap, (b'a' - b'a').into())); + enable_feature(Feature::c, bit::test(auxv.hwcap, (b'c' - b'a').into())); + enable_feature(Feature::d, bit::test(auxv.hwcap, (b'd' - b'a').into())); + enable_feature(Feature::f, bit::test(auxv.hwcap, (b'f' - b'a').into())); + enable_feature(Feature::m, bit::test(auxv.hwcap, (b'm' - b'a').into())); + let has_v = bit::test(auxv.hwcap, (b'v' - b'a').into()); + let mut is_v_set = false; + + // Use riscv_hwprobe syscall to query more extensions and + // performance-related capabilities. + 'hwprobe: { + let mut out = [ + riscv_hwprobe { + key: RISCV_HWPROBE_KEY_BASE_BEHAVIOR, + value: 0, + }, + riscv_hwprobe { + key: RISCV_HWPROBE_KEY_IMA_EXT_0, + value: 0, + }, + riscv_hwprobe { + key: RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF, + value: 0, + }, + riscv_hwprobe { + key: RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF, + value: 0, + }, + riscv_hwprobe { + key: RISCV_HWPROBE_KEY_CPUPERF_0, + value: 0, + }, + ]; + if !_riscv_hwprobe(&mut out) { + break 'hwprobe; + } + + // Query scalar/vector misaligned behavior. + if out[2].key != -1 { + enable_feature( + Feature::unaligned_scalar_mem, + out[2].value == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST, + ); + } else if out[4].key != -1 { + // Deprecated method for fallback + enable_feature( + Feature::unaligned_scalar_mem, + out[4].value & RISCV_HWPROBE_MISALIGNED_MASK == RISCV_HWPROBE_MISALIGNED_FAST, + ); + } + if out[3].key != -1 { + enable_feature( + Feature::unaligned_vector_mem, + out[3].value == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST, + ); + } + + // Query whether "I" base and extensions "M" and "A" (as in the ISA + // manual version 2.2) are enabled. "I" base at that time corresponds + // to "I", "Zicsr", "Zicntr" and "Zifencei" (as in the ISA manual version + // 20240411). + // This is a current requirement of + // `RISCV_HWPROBE_KEY_IMA_EXT_0`-based tests. + let has_ima = (out[0].key != -1) && (out[0].value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA != 0); + if !has_ima { + break 'hwprobe; + } + has_i |= has_ima; + enable_feature(Feature::zicsr, has_ima); + enable_feature(Feature::zicntr, has_ima); + enable_feature(Feature::zifencei, has_ima); + enable_feature(Feature::m, has_ima); + enable_feature(Feature::a, has_ima); + + // Enable features based on `RISCV_HWPROBE_KEY_IMA_EXT_0`. + if out[1].key == -1 { + break 'hwprobe; + } + let ima_ext_0 = out[1].value; + let test = |mask| (ima_ext_0 & mask) != 0; + + enable_feature(Feature::d, test(RISCV_HWPROBE_IMA_FD)); // F is implied. + enable_feature(Feature::c, test(RISCV_HWPROBE_IMA_C)); + + enable_feature(Feature::zicntr, test(RISCV_HWPROBE_EXT_ZICNTR)); + enable_feature(Feature::zihpm, test(RISCV_HWPROBE_EXT_ZIHPM)); + + enable_feature(Feature::zihintntl, test(RISCV_HWPROBE_EXT_ZIHINTNTL)); + enable_feature(Feature::zihintpause, test(RISCV_HWPROBE_EXT_ZIHINTPAUSE)); + enable_feature(Feature::zimop, test(RISCV_HWPROBE_EXT_ZIMOP)); + enable_feature(Feature::zicbom, test(RISCV_HWPROBE_EXT_ZICBOM)); + enable_feature(Feature::zicboz, test(RISCV_HWPROBE_EXT_ZICBOZ)); + enable_feature(Feature::zicond, test(RISCV_HWPROBE_EXT_ZICOND)); + + enable_feature(Feature::zalrsc, test(RISCV_HWPROBE_EXT_ZALRSC)); + enable_feature(Feature::zaamo, test(RISCV_HWPROBE_EXT_ZAAMO)); + enable_feature(Feature::zawrs, test(RISCV_HWPROBE_EXT_ZAWRS)); + enable_feature(Feature::zacas, test(RISCV_HWPROBE_EXT_ZACAS)); + enable_feature(Feature::ztso, test(RISCV_HWPROBE_EXT_ZTSO)); + + enable_feature(Feature::zba, test(RISCV_HWPROBE_EXT_ZBA)); + enable_feature(Feature::zbb, test(RISCV_HWPROBE_EXT_ZBB)); + enable_feature(Feature::zbs, test(RISCV_HWPROBE_EXT_ZBS)); + enable_feature(Feature::zbc, test(RISCV_HWPROBE_EXT_ZBC)); + + enable_feature(Feature::zbkb, test(RISCV_HWPROBE_EXT_ZBKB)); + enable_feature(Feature::zbkc, test(RISCV_HWPROBE_EXT_ZBKC)); + enable_feature(Feature::zbkx, test(RISCV_HWPROBE_EXT_ZBKX)); + enable_feature(Feature::zknd, test(RISCV_HWPROBE_EXT_ZKND)); + enable_feature(Feature::zkne, test(RISCV_HWPROBE_EXT_ZKNE)); + enable_feature(Feature::zknh, test(RISCV_HWPROBE_EXT_ZKNH)); + enable_feature(Feature::zksed, test(RISCV_HWPROBE_EXT_ZKSED)); + enable_feature(Feature::zksh, test(RISCV_HWPROBE_EXT_ZKSH)); + enable_feature(Feature::zkt, test(RISCV_HWPROBE_EXT_ZKT)); + + enable_feature(Feature::zcmop, test(RISCV_HWPROBE_EXT_ZCMOP)); + enable_feature(Feature::zca, test(RISCV_HWPROBE_EXT_ZCA)); + enable_feature(Feature::zcf, test(RISCV_HWPROBE_EXT_ZCF)); + enable_feature(Feature::zcd, test(RISCV_HWPROBE_EXT_ZCD)); + enable_feature(Feature::zcb, test(RISCV_HWPROBE_EXT_ZCB)); + + enable_feature(Feature::zfh, test(RISCV_HWPROBE_EXT_ZFH)); + enable_feature(Feature::zfhmin, test(RISCV_HWPROBE_EXT_ZFHMIN)); + enable_feature(Feature::zfa, test(RISCV_HWPROBE_EXT_ZFA)); + enable_feature(Feature::zfbfmin, test(RISCV_HWPROBE_EXT_ZFBFMIN)); + + // Use prctl (if any) to determine whether the vector extension + // is enabled on the current thread (assuming the entire process + // share the same status). If prctl fails (e.g. QEMU userland emulator + // as of version 9.2.3), use auxiliary vector to retrieve the default + // vector status on the process startup. + let has_vectors = { + let v_status = unsafe { libc::prctl(PR_RISCV_V_GET_CONTROL) }; + if v_status >= 0 { + (v_status & PR_RISCV_V_VSTATE_CTRL_CUR_MASK) == PR_RISCV_V_VSTATE_CTRL_ON + } else { + has_v + } + }; + if has_vectors { + enable_feature(Feature::v, test(RISCV_HWPROBE_IMA_V)); + enable_feature(Feature::zve32x, test(RISCV_HWPROBE_EXT_ZVE32X)); + enable_feature(Feature::zve32f, test(RISCV_HWPROBE_EXT_ZVE32F)); + enable_feature(Feature::zve64x, test(RISCV_HWPROBE_EXT_ZVE64X)); + enable_feature(Feature::zve64f, test(RISCV_HWPROBE_EXT_ZVE64F)); + enable_feature(Feature::zve64d, test(RISCV_HWPROBE_EXT_ZVE64D)); + + enable_feature(Feature::zvbb, test(RISCV_HWPROBE_EXT_ZVBB)); + enable_feature(Feature::zvbc, test(RISCV_HWPROBE_EXT_ZVBC)); + enable_feature(Feature::zvkb, test(RISCV_HWPROBE_EXT_ZVKB)); + enable_feature(Feature::zvkg, test(RISCV_HWPROBE_EXT_ZVKG)); + enable_feature(Feature::zvkned, test(RISCV_HWPROBE_EXT_ZVKNED)); + enable_feature(Feature::zvknha, test(RISCV_HWPROBE_EXT_ZVKNHA)); + enable_feature(Feature::zvknhb, test(RISCV_HWPROBE_EXT_ZVKNHB)); + enable_feature(Feature::zvksed, test(RISCV_HWPROBE_EXT_ZVKSED)); + enable_feature(Feature::zvksh, test(RISCV_HWPROBE_EXT_ZVKSH)); + enable_feature(Feature::zvkt, test(RISCV_HWPROBE_EXT_ZVKT)); + + enable_feature(Feature::zvfh, test(RISCV_HWPROBE_EXT_ZVFH)); + enable_feature(Feature::zvfhmin, test(RISCV_HWPROBE_EXT_ZVFHMIN)); + enable_feature(Feature::zvfbfmin, test(RISCV_HWPROBE_EXT_ZVFBFMIN)); + enable_feature(Feature::zvfbfwma, test(RISCV_HWPROBE_EXT_ZVFBFWMA)); + } + is_v_set = true; + }; + + // Set V purely depending on the auxiliary vector + // only if no fine-grained vector extension detection is available. + if !is_v_set { + enable_feature(Feature::v, has_v); + } + + // Handle base ISA. + // If future RV128I is supported, implement with `enable_feature` here. + // Note that we should use `target_arch` instead of `target_pointer_width` + // to avoid misdetection caused by experimental ABIs such as RV64ILP32. + #[cfg(target_arch = "riscv64")] + enable_feature(Feature::rv64i, has_i); + #[cfg(target_arch = "riscv32")] + enable_feature(Feature::rv32i, has_i); + + imply_features(value) +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/s390x.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/s390x.rs new file mode 100644 index 00000000000..9b53f526d61 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/s390x.rs @@ -0,0 +1,152 @@ +//! Run-time feature detection for s390x on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +/// Try to read the features from the auxiliary vector +pub(crate) fn detect_features() -> cache::Initializer { + let opt_hwcap: Option<AtHwcap> = auxvec::auxv().ok().map(Into::into); + let facilities = ExtendedFacilityList::new(); + cache(opt_hwcap, facilities) +} + +#[derive(Debug, Default, PartialEq)] +struct AtHwcap { + esan3: bool, + zarch: bool, + stfle: bool, + msa: bool, + ldisp: bool, + eimm: bool, + dfp: bool, + hpage: bool, + etf3eh: bool, + high_gprs: bool, + te: bool, + vxrs: bool, + vxrs_bcd: bool, + vxrs_ext: bool, + gs: bool, + vxrs_ext2: bool, + vxrs_pde: bool, + sort: bool, + dflt: bool, + vxrs_pde2: bool, + nnpa: bool, + pci_mio: bool, + sie: bool, +} + +impl From<auxvec::AuxVec> for AtHwcap { + /// Reads AtHwcap from the auxiliary vector. + fn from(auxv: auxvec::AuxVec) -> Self { + AtHwcap { + esan3: bit::test(auxv.hwcap, 0), + zarch: bit::test(auxv.hwcap, 1), + stfle: bit::test(auxv.hwcap, 2), + msa: bit::test(auxv.hwcap, 3), + ldisp: bit::test(auxv.hwcap, 4), + eimm: bit::test(auxv.hwcap, 5), + dfp: bit::test(auxv.hwcap, 6), + hpage: bit::test(auxv.hwcap, 7), + etf3eh: bit::test(auxv.hwcap, 8), + high_gprs: bit::test(auxv.hwcap, 9), + te: bit::test(auxv.hwcap, 10), + vxrs: bit::test(auxv.hwcap, 11), + vxrs_bcd: bit::test(auxv.hwcap, 12), + vxrs_ext: bit::test(auxv.hwcap, 13), + gs: bit::test(auxv.hwcap, 14), + vxrs_ext2: bit::test(auxv.hwcap, 15), + vxrs_pde: bit::test(auxv.hwcap, 16), + sort: bit::test(auxv.hwcap, 17), + dflt: bit::test(auxv.hwcap, 18), + vxrs_pde2: bit::test(auxv.hwcap, 19), + nnpa: bit::test(auxv.hwcap, 20), + pci_mio: bit::test(auxv.hwcap, 21), + sie: bit::test(auxv.hwcap, 22), + } + } +} + +struct ExtendedFacilityList([u64; 4]); + +impl ExtendedFacilityList { + fn new() -> Self { + let mut result: [u64; 4] = [0; 4]; + // SAFETY: rust/llvm only support s390x version with the `stfle` instruction. + unsafe { + core::arch::asm!( + // equivalently ".insn s, 0xb2b00000, 0({1})", + "stfle 0({})", + in(reg_addr) result.as_mut_ptr() , + inout("r0") result.len() as u64 - 1 => _, + options(nostack) + ); + } + Self(result) + } + + const fn get_bit(&self, n: usize) -> bool { + // NOTE: bits are numbered from the left. + self.0[n / 64] & (1 << (63 - (n % 64))) != 0 + } +} + +/// Initializes the cache from the feature bits. +/// +/// These values are part of the platform-specific [asm/elf.h][kernel], and are a selection of the +/// fields found in the [Facility Indications]. +/// +/// [Facility Indications]: https://www.ibm.com/support/pages/sites/default/files/2021-05/SA22-7871-10.pdf#page=63 +/// [kernel]: https://github.com/torvalds/linux/blob/b62cef9a5c673f1b8083159f5dc03c1c5daced2f/arch/s390/include/asm/elf.h#L129 +fn cache(hwcap: Option<AtHwcap>, facilities: ExtendedFacilityList) -> cache::Initializer { + let mut value = cache::Initializer::default(); + + { + let mut enable_if_set = |bit_index, f| { + if facilities.get_bit(bit_index) { + value.set(f as u32); + } + }; + + // We use HWCAP for `vector` because it requires both hardware and kernel support. + if let Some(AtHwcap { vxrs: true, .. }) = hwcap { + // vector and related + + enable_if_set(129, Feature::vector); + + enable_if_set(135, Feature::vector_enhancements_1); + enable_if_set(148, Feature::vector_enhancements_2); + enable_if_set(198, Feature::vector_enhancements_3); + + enable_if_set(134, Feature::vector_packed_decimal); + enable_if_set(152, Feature::vector_packed_decimal_enhancement); + enable_if_set(192, Feature::vector_packed_decimal_enhancement_2); + enable_if_set(199, Feature::vector_packed_decimal_enhancement_3); + + enable_if_set(165, Feature::nnp_assist); + } + + // others + + enable_if_set(76, Feature::message_security_assist_extension3); + enable_if_set(77, Feature::message_security_assist_extension4); + enable_if_set(57, Feature::message_security_assist_extension5); + enable_if_set(146, Feature::message_security_assist_extension8); + enable_if_set(155, Feature::message_security_assist_extension9); + enable_if_set(86, Feature::message_security_assist_extension12); + + enable_if_set(58, Feature::miscellaneous_extensions_2); + enable_if_set(61, Feature::miscellaneous_extensions_3); + enable_if_set(84, Feature::miscellaneous_extensions_4); + + enable_if_set(45, Feature::high_word); + enable_if_set(73, Feature::transactional_execution); + enable_if_set(133, Feature::guarded_storage); + enable_if_set(150, Feature::enhanced_sort); + enable_if_set(151, Feature::deflate_conversion); + enable_if_set(201, Feature::concurrent_functions); + } + + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/openbsd/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/openbsd/aarch64.rs new file mode 100644 index 00000000000..cfe4ad10ad6 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/openbsd/aarch64.rs @@ -0,0 +1,55 @@ +//! Run-time feature detection for Aarch64 on OpenBSD. +//! +//! OpenBSD doesn't trap the mrs instruction, but exposes the system registers through sysctl. +//! https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8 +//! https://github.com/golang/go/commit/cd54ef1f61945459486e9eea2f016d99ef1da925 + +use crate::detect::cache; +use core::{mem::MaybeUninit, ptr}; + +// Defined in machine/cpu.h. +// https://github.com/openbsd/src/blob/72ccc03bd11da614f31f7ff76e3f6fce99bc1c79/sys/arch/arm64/include/cpu.h#L25-L40 +const CPU_ID_AA64ISAR0: libc::c_int = 2; +const CPU_ID_AA64ISAR1: libc::c_int = 3; +const CPU_ID_AA64MMFR2: libc::c_int = 7; +const CPU_ID_AA64PFR0: libc::c_int = 8; + +/// Try to read the features from the system registers. +pub(crate) fn detect_features() -> cache::Initializer { + // ID_AA64ISAR0_EL1 and ID_AA64ISAR1_EL1 are supported on OpenBSD 7.1+. + // https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8 + // Others are supported on OpenBSD 7.3+. + // https://github.com/openbsd/src/commit/c7654cd65262d532212f65123ee3905ba200365c + // sysctl returns an unsupported error if operation is not supported, + // so we can safely use this function on older versions of OpenBSD. + let aa64isar0 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64ISAR0]).unwrap_or(0); + let aa64isar1 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64ISAR1]).unwrap_or(0); + let aa64mmfr2 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64MMFR2]).unwrap_or(0); + // Do not use unwrap_or(0) because in fp and asimd fields, 0 indicates that + // the feature is available. + let aa64pfr0 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64PFR0]); + + super::aarch64::parse_system_registers(aa64isar0, aa64isar1, aa64mmfr2, aa64pfr0) +} + +#[inline] +fn sysctl64(mib: &[libc::c_int]) -> Option<u64> { + const OUT_LEN: libc::size_t = core::mem::size_of::<u64>(); + let mut out = MaybeUninit::<u64>::uninit(); + let mut out_len = OUT_LEN; + let res = unsafe { + libc::sysctl( + mib.as_ptr(), + mib.len() as libc::c_uint, + out.as_mut_ptr() as *mut libc::c_void, + &mut out_len, + ptr::null_mut(), + 0, + ) + }; + if res == -1 || out_len != OUT_LEN { + return None; + } + // SAFETY: we've checked that sysctl was successful and `out` was filled. + Some(unsafe { out.assume_init() }) +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/other.rs b/library/stdarch/crates/std_detect/src/detect/os/other.rs new file mode 100644 index 00000000000..091fafc4ebf --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/other.rs @@ -0,0 +1,8 @@ +//! Other operating systems + +use crate::detect::cache; + +#[allow(dead_code)] +pub(crate) fn detect_features() -> cache::Initializer { + cache::Initializer::default() +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/riscv.rs new file mode 100644 index 00000000000..4c59ede8029 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/riscv.rs @@ -0,0 +1,203 @@ +//! Run-time feature detection utility for RISC-V. +//! +//! On RISC-V, full feature detection needs a help of one or more +//! feature detection mechanisms (usually provided by the operating system). +//! +//! RISC-V architecture defines many extensions and some have dependency to others. +//! More importantly, some of them cannot be enabled without resolving such +//! dependencies due to limited set of features that such mechanisms provide. +//! +//! This module provides an OS-independent utility to process such relations +//! between RISC-V extensions. + +use crate::detect::{Feature, cache}; + +/// Imply features by the given set of enabled features. +/// +/// Note that it does not perform any consistency checks including existence of +/// conflicting extensions and/or complicated requirements. Eliminating such +/// inconsistencies is the responsibility of the feature detection logic and +/// its provider(s). +pub(crate) fn imply_features(mut value: cache::Initializer) -> cache::Initializer { + loop { + // Check convergence of the feature flags later. + let prev = value; + + // Expect that the optimizer turns repeated operations into + // a fewer number of bit-manipulation operations. + macro_rules! imply { + // Regular implication: + // A1 => (B1[, B2...]), A2 => (B1[, B2...]) and so on. + ($($from: ident)|+ => $($to: ident)&+) => { + if [$(Feature::$from as u32),+].iter().any(|&x| value.test(x)) { + $( + value.set(Feature::$to as u32); + )+ + } + }; + // Implication with multiple requirements: + // A1 && A2 ... => (B1[, B2...]). + ($($from: ident)&+ => $($to: ident)&+) => { + if [$(Feature::$from as u32),+].iter().all(|&x| value.test(x)) { + $( + value.set(Feature::$to as u32); + )+ + } + }; + } + macro_rules! group { + ($group: ident == $($member: ident)&+) => { + // Forward implication as defined in the specifications. + imply!($group => $($member)&+); + // Reverse implication to "group extension" from its members. + // This is not a part of specifications but convenient for + // feature detection and implemented in e.g. LLVM. + imply!($($member)&+ => $group); + }; + } + + /* + If a dependency/implication is not explicitly stated in the + specification, it is denoted as a comment as follows: + "defined as subset": + The latter extension is described as a subset of the former + (but the evidence is weak). + "functional": + The former extension is functionally a superset of the latter + (no direct references though). + */ + + imply!(zvbb => zvkb); + + // Certain set of vector cryptography extensions form a group. + group!(zvkn == zvkned & zvknhb & zvkb & zvkt); + group!(zvknc == zvkn & zvbc); + group!(zvkng == zvkn & zvkg); + group!(zvks == zvksed & zvksh & zvkb & zvkt); + group!(zvksc == zvks & zvbc); + group!(zvksg == zvks & zvkg); + + imply!(zvknhb => zvknha); // functional + + // For vector cryptography, Zvknhb and Zvbc require integer arithmetic + // with EEW=64 (Zve64x) while others not depending on them + // require EEW=32 (Zve32x). + imply!(zvknhb | zvbc => zve64x); + imply!(zvbb | zvkb | zvkg | zvkned | zvknha | zvksed | zvksh => zve32x); + + imply!(zbc => zbkc); // defined as subset + group!(zkn == zbkb & zbkc & zbkx & zkne & zknd & zknh); + group!(zks == zbkb & zbkc & zbkx & zksed & zksh); + group!(zk == zkn & zkr & zkt); + + imply!(zacas => zaamo); + group!(a == zalrsc & zaamo); + + group!(b == zba & zbb & zbs); + + imply!(zcf => zca & f); + imply!(zcd => zca & d); + imply!(zcmop | zcb => zca); + + imply!(zhinx => zhinxmin); + imply!(zdinx | zhinxmin => zfinx); + + imply!(zvfh => zvfhmin); // functional + imply!(zvfh => zve32f & zfhmin); + imply!(zvfhmin => zve32f); + imply!(zvfbfwma => zvfbfmin & zfbfmin); + imply!(zvfbfmin => zve32f); + + imply!(v => zve64d); + imply!(zve64d => zve64f & d); + imply!(zve64f => zve64x & zve32f); + imply!(zve64x => zve32x); + imply!(zve32f => zve32x & f); + + imply!(zfh => zfhmin); + imply!(q => d); + imply!(d | zfhmin | zfa => f); + imply!(zfbfmin => f); // and some of (not all) "Zfh" instructions. + + // Relatively complex implication rules from the "C" extension. + imply!(c => zca); + imply!(c & d => zcd); + #[cfg(target_arch = "riscv32")] + imply!(c & f => zcf); + + imply!(zicntr | zihpm | f | zfinx | zve32x => zicsr); + + // Loop until the feature flags converge. + if prev == value { + return value; + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn simple_direct() { + let mut value = cache::Initializer::default(); + value.set(Feature::f as u32); + // F (and other extensions with CSRs) -> Zicsr + assert!(imply_features(value).test(Feature::zicsr as u32)); + } + + #[test] + fn simple_indirect() { + let mut value = cache::Initializer::default(); + value.set(Feature::q as u32); + // Q -> D, D -> F, F -> Zicsr + assert!(imply_features(value).test(Feature::zicsr as u32)); + } + + #[test] + fn complex_zcd() { + let mut value = cache::Initializer::default(); + // C & D -> Zcd + value.set(Feature::c as u32); + assert!(!imply_features(value).test(Feature::zcd as u32)); + value.set(Feature::d as u32); + assert!(imply_features(value).test(Feature::zcd as u32)); + } + + #[test] + fn group_simple_forward() { + let mut value = cache::Initializer::default(); + // A -> Zalrsc & Zaamo (forward implication) + value.set(Feature::a as u32); + let value = imply_features(value); + assert!(value.test(Feature::zalrsc as u32)); + assert!(value.test(Feature::zaamo as u32)); + } + + #[test] + fn group_simple_backward() { + let mut value = cache::Initializer::default(); + // Zalrsc & Zaamo -> A (reverse implication) + value.set(Feature::zalrsc as u32); + value.set(Feature::zaamo as u32); + assert!(imply_features(value).test(Feature::a as u32)); + } + + #[test] + fn group_complex_convergence() { + let mut value = cache::Initializer::default(); + // Needs 3 iterations to converge + // (and 4th iteration for convergence checking): + // 1. [Zvksc] -> Zvks & Zvbc + // 2. Zvks -> Zvksed & Zvksh & Zvkb & Zvkt + // 3a. [Zvkned] & [Zvknhb] & [Zvkb] & Zvkt -> {Zvkn} + // 3b. Zvkn & Zvbc -> {Zvknc} + value.set(Feature::zvksc as u32); + value.set(Feature::zvkned as u32); + value.set(Feature::zvknhb as u32); + value.set(Feature::zvkb as u32); + let value = imply_features(value); + assert!(value.test(Feature::zvkn as u32)); + assert!(value.test(Feature::zvknc as u32)); + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs new file mode 100644 index 00000000000..937f9f26eed --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs @@ -0,0 +1,125 @@ +//! Run-time feature detection for Aarch64 on Windows. + +use crate::detect::{Feature, cache}; + +/// Try to read the features using IsProcessorFeaturePresent. +pub(crate) fn detect_features() -> cache::Initializer { + type DWORD = u32; + type BOOL = i32; + + const FALSE: BOOL = 0; + // The following Microsoft documents isn't updated for aarch64. + // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent + // These are defined in winnt.h of Windows SDK + const PF_ARM_VFP_32_REGISTERS_AVAILABLE: u32 = 18; + const PF_ARM_NEON_INSTRUCTIONS_AVAILABLE: u32 = 19; + const PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE: u32 = 30; + const PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE: u32 = 31; + const PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE: u32 = 34; + const PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE: u32 = 43; + const PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE: u32 = 44; + const PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE: u32 = 45; + const PF_ARM_SVE_INSTRUCTIONS_AVAILABLE: u32 = 46; + const PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE: u32 = 47; + const PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE: u32 = 48; + const PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE: u32 = 49; + const PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE: u32 = 50; + const PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE: u32 = 51; + // const PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE: u32 = 52; + // const PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE: u32 = 53; + const PF_ARM_SVE_B16B16_INSTRUCTIONS_AVAILABLE: u32 = 54; + const PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE: u32 = 55; + const PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE: u32 = 56; + // const PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE: u32 = 57; + // const PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE: u32 = 58; + // const PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE: u32 = 59; + + unsafe extern "system" { + fn IsProcessorFeaturePresent(ProcessorFeature: DWORD) -> BOOL; + } + + let mut value = cache::Initializer::default(); + { + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + // Some features may be supported on current CPU, + // but no way to detect it by OS API. + // Also, we require unsafe block for the extern "system" calls. + unsafe { + enable_feature( + Feature::fp, + IsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::asimd, + IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::crc, + IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::lse, + IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::dotprod, + IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::jsconv, + IsProcessorFeaturePresent(PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::rcpc, + IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve, + IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve2, + IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve2p1, + IsProcessorFeaturePresent(PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve2_aes, + IsProcessorFeaturePresent(PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE) != FALSE + && IsProcessorFeaturePresent(PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE) + != FALSE, + ); + enable_feature( + Feature::sve2_bitperm, + IsProcessorFeaturePresent(PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve_b16b16, + IsProcessorFeaturePresent(PF_ARM_SVE_B16B16_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve2_sha3, + IsProcessorFeaturePresent(PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve2_sm4, + IsProcessorFeaturePresent(PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE means aes, sha1, sha2 and + // pmull support + let crypto = + IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != FALSE; + enable_feature(Feature::aes, crypto); + enable_feature(Feature::pmull, crypto); + enable_feature(Feature::sha2, crypto); + } + } + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs new file mode 100644 index 00000000000..8565c2f85e2 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs @@ -0,0 +1,335 @@ +//! x86 run-time feature detection is OS independent. + +#[cfg(target_arch = "x86")] +use core::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::*; + +use core::mem; + +use crate::detect::{Feature, bit, cache}; + +/// Run-time feature detection on x86 works by using the CPUID instruction. +/// +/// The [CPUID Wikipedia page][wiki_cpuid] contains +/// all the information about which flags to set to query which values, and in +/// which registers these are reported. +/// +/// The definitive references are: +/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: +/// Instruction Set Reference, A-Z][intel64_ref]. +/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and +/// System Instructions][amd64_ref]. +/// +/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID +/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf +#[allow(clippy::similar_names)] +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + + if cfg!(target_env = "sgx") { + // doesn't support this because it is untrusted data + return value; + } + + // Calling `__cpuid`/`__cpuid_count` from here on is safe because the CPU + // has `cpuid` support. + + // 0. EAX = 0: Basic Information: + // - EAX returns the "Highest Function Parameter", that is, the maximum + // leaf value for subsequent calls of `cpuinfo` in range [0, + // 0x8000_0000]. - The vendor ID is stored in 12 u8 ascii chars, + // returned in EBX, EDX, and ECX (in that order): + let (max_basic_leaf, vendor_id) = unsafe { + let CpuidResult { + eax: max_basic_leaf, + ebx, + ecx, + edx, + } = __cpuid(0); + let vendor_id: [[u8; 4]; 3] = [ebx.to_ne_bytes(), edx.to_ne_bytes(), ecx.to_ne_bytes()]; + let vendor_id: [u8; 12] = mem::transmute(vendor_id); + (max_basic_leaf, vendor_id) + }; + + if max_basic_leaf < 1 { + // Earlier Intel 486, CPUID not implemented + return value; + } + + // EAX = 1, ECX = 0: Queries "Processor Info and Feature Bits"; + // Contains information about most x86 features. + let CpuidResult { + ecx: proc_info_ecx, + edx: proc_info_edx, + .. + } = unsafe { __cpuid(0x0000_0001_u32) }; + + // EAX = 7: Queries "Extended Features"; + // Contains information about bmi,bmi2, and avx2 support. + let ( + extended_features_ebx, + extended_features_ecx, + extended_features_edx, + extended_features_eax_leaf_1, + extended_features_edx_leaf_1, + ) = if max_basic_leaf >= 7 { + let CpuidResult { ebx, ecx, edx, .. } = unsafe { __cpuid(0x0000_0007_u32) }; + let CpuidResult { + eax: eax_1, + edx: edx_1, + .. + } = unsafe { __cpuid_count(0x0000_0007_u32, 0x0000_0001_u32) }; + (ebx, ecx, edx, eax_1, edx_1) + } else { + (0, 0, 0, 0, 0) // CPUID does not support "Extended Features" + }; + + // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported + // - EAX returns the max leaf value for extended information, that is, + // `cpuid` calls in range [0x8000_0000; u32::MAX]: + let CpuidResult { + eax: extended_max_basic_leaf, + .. + } = unsafe { __cpuid(0x8000_0000_u32) }; + + // EAX = 0x8000_0001, ECX=0: Queries "Extended Processor Info and Feature + // Bits" + let extended_proc_info_ecx = if extended_max_basic_leaf >= 1 { + let CpuidResult { ecx, .. } = unsafe { __cpuid(0x8000_0001_u32) }; + ecx + } else { + 0 + }; + + { + // borrows value till the end of this scope: + let mut enable = |r, rb, f| { + let present = bit::test(r as usize, rb); + if present { + value.set(f as u32); + } + present + }; + + enable(proc_info_ecx, 0, Feature::sse3); + enable(proc_info_ecx, 1, Feature::pclmulqdq); + enable(proc_info_ecx, 9, Feature::ssse3); + enable(proc_info_ecx, 13, Feature::cmpxchg16b); + enable(proc_info_ecx, 19, Feature::sse4_1); + enable(proc_info_ecx, 20, Feature::sse4_2); + enable(proc_info_ecx, 22, Feature::movbe); + enable(proc_info_ecx, 23, Feature::popcnt); + enable(proc_info_ecx, 25, Feature::aes); + let f16c = enable(proc_info_ecx, 29, Feature::f16c); + enable(proc_info_ecx, 30, Feature::rdrand); + enable(extended_features_ebx, 18, Feature::rdseed); + enable(extended_features_ebx, 19, Feature::adx); + enable(extended_features_ebx, 11, Feature::rtm); + enable(proc_info_edx, 4, Feature::tsc); + enable(proc_info_edx, 23, Feature::mmx); + enable(proc_info_edx, 24, Feature::fxsr); + enable(proc_info_edx, 25, Feature::sse); + enable(proc_info_edx, 26, Feature::sse2); + enable(extended_features_ebx, 29, Feature::sha); + + enable(extended_features_ecx, 8, Feature::gfni); + enable(extended_features_ecx, 9, Feature::vaes); + enable(extended_features_ecx, 10, Feature::vpclmulqdq); + + enable(extended_features_ebx, 3, Feature::bmi1); + enable(extended_features_ebx, 8, Feature::bmi2); + + enable(extended_features_ebx, 9, Feature::ermsb); + + enable(extended_features_eax_leaf_1, 31, Feature::movrs); + + // Detect if CPUID.19h available + if bit::test(extended_features_ecx as usize, 23) { + let CpuidResult { ebx, .. } = unsafe { __cpuid(0x19) }; + enable(ebx, 0, Feature::kl); + enable(ebx, 2, Feature::widekl); + } + + // `XSAVE` and `AVX` support: + let cpu_xsave = bit::test(proc_info_ecx as usize, 26); + if cpu_xsave { + // 0. Here the CPU supports `XSAVE`. + + // 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and + // supports saving the state of the AVX/AVX2 vector registers on + // context-switches, see: + // + // - [intel: is avx enabled?][is_avx_enabled], + // - [mozilla: sse.cpp][mozilla_sse_cpp]. + // + // [is_avx_enabled]: https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled + // [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190 + let cpu_osxsave = bit::test(proc_info_ecx as usize, 27); + + if cpu_osxsave { + // 2. The OS must have signaled the CPU that it supports saving and + // restoring the: + // + // * SSE -> `XCR0.SSE[1]` + // * AVX -> `XCR0.AVX[2]` + // * AVX-512 -> `XCR0.AVX-512[7:5]`. + // * AMX -> `XCR0.AMX[18:17]` + // + // by setting the corresponding bits of `XCR0` to `1`. + // + // This is safe because the CPU supports `xsave` + // and the OS has set `osxsave`. + let xcr0 = unsafe { _xgetbv(0) }; + // Test `XCR0.SSE[1]` and `XCR0.AVX[2]` with the mask `0b110 == 6`: + let os_avx_support = xcr0 & 6 == 6; + // Test `XCR0.AVX-512[7:5]` with the mask `0b1110_0000 == 0xe0`: + let os_avx512_support = xcr0 & 0xe0 == 0xe0; + // Test `XCR0.AMX[18:17]` with the mask `0b110_0000_0000_0000_0000 == 0x60000` + let os_amx_support = xcr0 & 0x60000 == 0x60000; + + // Only if the OS and the CPU support saving/restoring the AVX + // registers we enable `xsave` support: + if os_avx_support { + // See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED + // FEATURES" in the "Intel® 64 and IA-32 Architectures Software + // Developer’s Manual, Volume 1: Basic Architecture": + // + // "Software enables the XSAVE feature set by setting + // CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4 + // instruction). If this bit is 0, execution of any of XGETBV, + // XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV + // causes an invalid-opcode exception (#UD)" + // + enable(proc_info_ecx, 26, Feature::xsave); + + // For `xsaveopt`, `xsavec`, and `xsaves` we need to query: + // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, + // ECX = 1): + if max_basic_leaf >= 0xd { + let CpuidResult { + eax: proc_extended_state1_eax, + .. + } = unsafe { __cpuid_count(0xd_u32, 1) }; + enable(proc_extended_state1_eax, 0, Feature::xsaveopt); + enable(proc_extended_state1_eax, 1, Feature::xsavec); + enable(proc_extended_state1_eax, 3, Feature::xsaves); + } + + // FMA (uses 256-bit wide registers): + let fma = enable(proc_info_ecx, 12, Feature::fma); + + // And AVX/AVX2: + enable(proc_info_ecx, 28, Feature::avx); + enable(extended_features_ebx, 5, Feature::avx2); + + // "Short" versions of AVX512 instructions + enable(extended_features_eax_leaf_1, 4, Feature::avxvnni); + enable(extended_features_eax_leaf_1, 23, Feature::avxifma); + enable(extended_features_edx_leaf_1, 4, Feature::avxvnniint8); + enable(extended_features_edx_leaf_1, 5, Feature::avxneconvert); + enable(extended_features_edx_leaf_1, 10, Feature::avxvnniint16); + + enable(extended_features_eax_leaf_1, 0, Feature::sha512); + enable(extended_features_eax_leaf_1, 1, Feature::sm3); + enable(extended_features_eax_leaf_1, 2, Feature::sm4); + + // For AVX-512 the OS also needs to support saving/restoring + // the extended state, only then we enable AVX-512 support: + // Also, Rust makes `avx512f` imply `fma` and `f16c`, because + // otherwise the assembler is broken. But Intel doesn't guarantee + // that `fma` and `f16c` are available with `avx512f`, so we + // need to check for them separately. + if os_avx512_support && f16c && fma { + enable(extended_features_ebx, 16, Feature::avx512f); + enable(extended_features_ebx, 17, Feature::avx512dq); + enable(extended_features_ebx, 21, Feature::avx512ifma); + enable(extended_features_ebx, 26, Feature::avx512pf); + enable(extended_features_ebx, 27, Feature::avx512er); + enable(extended_features_ebx, 28, Feature::avx512cd); + enable(extended_features_ebx, 30, Feature::avx512bw); + enable(extended_features_ebx, 31, Feature::avx512vl); + enable(extended_features_ecx, 1, Feature::avx512vbmi); + enable(extended_features_ecx, 6, Feature::avx512vbmi2); + enable(extended_features_ecx, 11, Feature::avx512vnni); + enable(extended_features_ecx, 12, Feature::avx512bitalg); + enable(extended_features_ecx, 14, Feature::avx512vpopcntdq); + enable(extended_features_edx, 8, Feature::avx512vp2intersect); + enable(extended_features_edx, 23, Feature::avx512fp16); + enable(extended_features_eax_leaf_1, 5, Feature::avx512bf16); + } + } + + if os_amx_support { + enable(extended_features_edx, 24, Feature::amx_tile); + enable(extended_features_edx, 25, Feature::amx_int8); + enable(extended_features_edx, 22, Feature::amx_bf16); + enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16); + enable(extended_features_edx_leaf_1, 8, Feature::amx_complex); + + if max_basic_leaf >= 0x1e { + let CpuidResult { + eax: amx_feature_flags_eax, + .. + } = unsafe { __cpuid_count(0x1e_u32, 1) }; + + enable(amx_feature_flags_eax, 4, Feature::amx_fp8); + enable(amx_feature_flags_eax, 5, Feature::amx_transpose); + enable(amx_feature_flags_eax, 6, Feature::amx_tf32); + enable(amx_feature_flags_eax, 7, Feature::amx_avx512); + enable(amx_feature_flags_eax, 8, Feature::amx_movrs); + } + } + } + } + + // This detects ABM on AMD CPUs and LZCNT on Intel CPUs. + // On intel CPUs with popcnt, lzcnt implements the + // "missing part" of ABM, so we map both to the same + // internal feature. + // + // The `is_x86_feature_detected!("lzcnt")` macro then + // internally maps to Feature::abm. + enable(extended_proc_info_ecx, 5, Feature::lzcnt); + + // As Hygon Dhyana originates from AMD technology and shares most of the architecture with + // AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series + // number(Family 18h). + // + // For CPUID feature bits, Hygon Dhyana(family 18h) share the same definition with AMD + // family 17h. + // + // Related AMD CPUID specification is https://www.amd.com/system/files/TechDocs/25481.pdf. + // Related Hygon kernel patch can be found on + // http://lkml.kernel.org/r/5ce86123a7b9dad925ac583d88d2f921040e859b.1538583282.git.puwen@hygon.cn + if vendor_id == *b"AuthenticAMD" || vendor_id == *b"HygonGenuine" { + // These features are available on AMD arch CPUs: + enable(extended_proc_info_ecx, 6, Feature::sse4a); + enable(extended_proc_info_ecx, 21, Feature::tbm); + enable(extended_proc_info_ecx, 11, Feature::xop); + } + } + + // Unfortunately, some Skylake chips erroneously report support for BMI1 and + // BMI2 without actual support. These chips don't support AVX, and it seems + // that all Intel chips with non-erroneous support BMI do (I didn't check + // other vendors), so we can disable these flags for chips that don't also + // report support for AVX. + // + // It's possible this will pessimize future chips that do support BMI and + // not AVX, but this seems minor compared to a hard crash you get when + // executing an unsupported instruction (to put it another way, it's safe + // for us to under-report CPU features, but not to over-report them). Still, + // to limit any impact this may have in the future, we only do this for + // Intel chips, as it's a bug only present in their chips. + // + // This bug is documented as `SKL052` in the errata section of this document: + // http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/desktop-6th-gen-core-family-spec-update.pdf + if vendor_id == *b"GenuineIntel" && !value.test(Feature::avx as u32) { + value.unset(Feature::bmi1 as u32); + value.unset(Feature::bmi2 as u32); + } + + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/test_data/linux-artificial-aarch64.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/linux-artificial-aarch64.auxv new file mode 100644 index 00000000000..ec826afcf38 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/test_data/linux-artificial-aarch64.auxv Binary files differdiff --git a/library/stdarch/crates/std_detect/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv new file mode 100644 index 00000000000..95537b73f20 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv Binary files differdiff --git a/library/stdarch/crates/std_detect/src/detect/test_data/linux-hwcap2-aarch64.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/linux-hwcap2-aarch64.auxv new file mode 100644 index 00000000000..1d87264b221 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/test_data/linux-hwcap2-aarch64.auxv Binary files differdiff --git a/library/stdarch/crates/std_detect/src/detect/test_data/linux-no-hwcap2-aarch64.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/linux-no-hwcap2-aarch64.auxv new file mode 100644 index 00000000000..35f01cc767c --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/test_data/linux-no-hwcap2-aarch64.auxv Binary files differdiff --git a/library/stdarch/crates/std_detect/src/detect/test_data/linux-rpi3.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/linux-rpi3.auxv new file mode 100644 index 00000000000..0538e661f63 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/test_data/linux-rpi3.auxv Binary files differdiff --git a/library/stdarch/crates/std_detect/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv new file mode 100644 index 00000000000..75abc02d178 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv Binary files differdiff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs new file mode 100644 index 00000000000..ab1b77bad5b --- /dev/null +++ b/library/stdarch/crates/std_detect/src/lib.rs @@ -0,0 +1,36 @@ +//! Run-time feature detection for the Rust standard library. +//! +//! To detect whether a feature is enabled in the system running the binary +//! use one of the appropriate macro for the target: +//! +//! * `x86` and `x86_64`: [`is_x86_feature_detected`] +//! * `arm`: [`is_arm_feature_detected`] +//! * `aarch64`: [`is_aarch64_feature_detected`] +//! * `riscv`: [`is_riscv_feature_detected`] +//! * `mips`: [`is_mips_feature_detected`] +//! * `mips64`: [`is_mips64_feature_detected`] +//! * `powerpc`: [`is_powerpc_feature_detected`] +//! * `powerpc64`: [`is_powerpc64_feature_detected`] +//! * `loongarch`: [`is_loongarch_feature_detected`] +//! * `s390x`: [`is_s390x_feature_detected`] + +#![unstable(feature = "stdarch_internal", issue = "none")] +#![feature(staged_api, doc_cfg, allow_internal_unstable)] +#![deny(rust_2018_idioms)] +#![allow(clippy::shadow_reuse)] +#![cfg_attr(test, allow(unused_imports))] +#![no_std] +#![allow(internal_features)] + +#[cfg(test)] +#[macro_use] +extern crate std; + +// rust-lang/rust#83888: removing `extern crate` gives an error that `vec_spare> +#[cfg_attr(feature = "std_detect_file_io", allow(unused_extern_crates))] +#[cfg(feature = "std_detect_file_io")] +extern crate alloc; + +#[doc(hidden)] +#[unstable(feature = "stdarch_internal", issue = "none")] +pub mod detect; |
