diff options
| author | Jakub Beránek <berykubik@gmail.com> | 2025-07-04 09:26:12 +0200 |
|---|---|---|
| committer | Jakub Beránek <berykubik@gmail.com> | 2025-07-22 20:17:06 +0200 |
| commit | 5b2de8ab27e4a319e23817b61830a5cc6fd1745e (patch) | |
| tree | 94d6ce5680690c678e56d907ec27145fb464eeed /library/std_detect/src/detect/os/linux | |
| parent | 2e5367566819ca7878baa9600ae7a93eb0e37bbf (diff) | |
| download | rust-5b2de8ab27e4a319e23817b61830a5cc6fd1745e.tar.gz rust-5b2de8ab27e4a319e23817b61830a5cc6fd1745e.zip | |
Move `std_detect` from `library/stdarch` to `library`
Diffstat (limited to 'library/std_detect/src/detect/os/linux')
| -rw-r--r-- | library/std_detect/src/detect/os/linux/aarch64.rs | 484 | ||||
| -rw-r--r-- | library/std_detect/src/detect/os/linux/arm.rs | 34 | ||||
| -rw-r--r-- | library/std_detect/src/detect/os/linux/auxvec.rs | 341 | ||||
| -rw-r--r-- | library/std_detect/src/detect/os/linux/loongarch.rs | 68 | ||||
| -rw-r--r-- | library/std_detect/src/detect/os/linux/mips.rs | 23 | ||||
| -rw-r--r-- | library/std_detect/src/detect/os/linux/mod.rs | 67 | ||||
| -rw-r--r-- | library/std_detect/src/detect/os/linux/powerpc.rs | 35 | ||||
| -rw-r--r-- | library/std_detect/src/detect/os/linux/riscv.rs | 330 | ||||
| -rw-r--r-- | library/std_detect/src/detect/os/linux/s390x.rs | 152 |
9 files changed, 1534 insertions, 0 deletions
diff --git a/library/std_detect/src/detect/os/linux/aarch64.rs b/library/std_detect/src/detect/os/linux/aarch64.rs new file mode 100644 index 00000000000..22a9cefff7b --- /dev/null +++ b/library/std_detect/src/detect/os/linux/aarch64.rs @@ -0,0 +1,484 @@ +//! Run-time feature detection for Aarch64 on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + #[cfg(target_os = "android")] + let is_exynos9810 = { + // Samsung Exynos 9810 has a bug that big and little cores have different + // ISAs. And on older Android (pre-9), the kernel incorrectly reports + // that features available only on some cores are available on all cores. + // https://reviews.llvm.org/D114523 + let mut arch = [0_u8; libc::PROP_VALUE_MAX as usize]; + let len = unsafe { + libc::__system_property_get(c"ro.arch".as_ptr(), arch.as_mut_ptr() as *mut libc::c_char) + }; + // On Exynos, ro.arch is not available on Android 12+, but it is fine + // because Android 9+ includes the fix. + len > 0 && arch.starts_with(b"exynos9810") + }; + #[cfg(not(target_os = "android"))] + let is_exynos9810 = false; + + if let Ok(auxv) = auxvec::auxv() { + let hwcap: AtHwcap = auxv.into(); + return hwcap.cache(is_exynos9810); + } + cache::Initializer::default() +} + +/// These values are part of the platform-specific [asm/hwcap.h][hwcap] . +/// +/// The names match those used for cpuinfo. +/// +/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h +#[derive(Debug, Default, PartialEq)] +struct AtHwcap { + // AT_HWCAP + fp: bool, + asimd: bool, + // evtstrm: No LLVM support. + aes: bool, + pmull: bool, + sha1: bool, + sha2: bool, + crc32: bool, + atomics: bool, + fphp: bool, + asimdhp: bool, + // cpuid: No LLVM support. + asimdrdm: bool, + jscvt: bool, + fcma: bool, + lrcpc: bool, + dcpop: bool, + sha3: bool, + sm3: bool, + sm4: bool, + asimddp: bool, + sha512: bool, + sve: bool, + fhm: bool, + dit: bool, + uscat: bool, + ilrcpc: bool, + flagm: bool, + ssbs: bool, + sb: bool, + paca: bool, + pacg: bool, + + // AT_HWCAP2 + dcpodp: bool, + sve2: bool, + sveaes: bool, + svepmull: bool, + svebitperm: bool, + svesha3: bool, + svesm4: bool, + flagm2: bool, + frint: bool, + // svei8mm: See i8mm feature. + svef32mm: bool, + svef64mm: bool, + // svebf16: See bf16 feature. + i8mm: bool, + bf16: bool, + // dgh: No LLVM support. + rng: bool, + bti: bool, + mte: bool, + ecv: bool, + // afp: bool, + // rpres: bool, + // mte3: bool, + sme: bool, + smei16i64: bool, + smef64f64: bool, + // smei8i32: bool, + // smef16f32: bool, + // smeb16f32: bool, + // smef32f32: bool, + smefa64: bool, + wfxt: bool, + // ebf16: bool, + // sveebf16: bool, + cssc: bool, + // rprfm: bool, + sve2p1: bool, + sme2: bool, + sme2p1: bool, + // smei16i32: bool, + // smebi32i32: bool, + smeb16b16: bool, + smef16f16: bool, + mops: bool, + hbc: bool, + sveb16b16: bool, + lrcpc3: bool, + lse128: bool, + fpmr: bool, + lut: bool, + faminmax: bool, + f8cvt: bool, + f8fma: bool, + f8dp4: bool, + f8dp2: bool, + f8e4m3: bool, + f8e5m2: bool, + smelutv2: bool, + smef8f16: bool, + smef8f32: bool, + smesf8fma: bool, + smesf8dp4: bool, + smesf8dp2: bool, + // pauthlr: bool, +} + +impl From<auxvec::AuxVec> for AtHwcap { + /// Reads AtHwcap from the auxiliary vector. + fn from(auxv: auxvec::AuxVec) -> Self { + AtHwcap { + fp: bit::test(auxv.hwcap, 0), + asimd: bit::test(auxv.hwcap, 1), + // evtstrm: bit::test(auxv.hwcap, 2), + aes: bit::test(auxv.hwcap, 3), + pmull: bit::test(auxv.hwcap, 4), + sha1: bit::test(auxv.hwcap, 5), + sha2: bit::test(auxv.hwcap, 6), + crc32: bit::test(auxv.hwcap, 7), + atomics: bit::test(auxv.hwcap, 8), + fphp: bit::test(auxv.hwcap, 9), + asimdhp: bit::test(auxv.hwcap, 10), + // cpuid: bit::test(auxv.hwcap, 11), + asimdrdm: bit::test(auxv.hwcap, 12), + jscvt: bit::test(auxv.hwcap, 13), + fcma: bit::test(auxv.hwcap, 14), + lrcpc: bit::test(auxv.hwcap, 15), + dcpop: bit::test(auxv.hwcap, 16), + sha3: bit::test(auxv.hwcap, 17), + sm3: bit::test(auxv.hwcap, 18), + sm4: bit::test(auxv.hwcap, 19), + asimddp: bit::test(auxv.hwcap, 20), + sha512: bit::test(auxv.hwcap, 21), + sve: bit::test(auxv.hwcap, 22), + fhm: bit::test(auxv.hwcap, 23), + dit: bit::test(auxv.hwcap, 24), + uscat: bit::test(auxv.hwcap, 25), + ilrcpc: bit::test(auxv.hwcap, 26), + flagm: bit::test(auxv.hwcap, 27), + ssbs: bit::test(auxv.hwcap, 28), + sb: bit::test(auxv.hwcap, 29), + paca: bit::test(auxv.hwcap, 30), + pacg: bit::test(auxv.hwcap, 31), + + // AT_HWCAP2 + dcpodp: bit::test(auxv.hwcap2, 0), + sve2: bit::test(auxv.hwcap2, 1), + sveaes: bit::test(auxv.hwcap2, 2), + svepmull: bit::test(auxv.hwcap2, 3), + svebitperm: bit::test(auxv.hwcap2, 4), + svesha3: bit::test(auxv.hwcap2, 5), + svesm4: bit::test(auxv.hwcap2, 6), + flagm2: bit::test(auxv.hwcap2, 7), + frint: bit::test(auxv.hwcap2, 8), + // svei8mm: bit::test(auxv.hwcap2, 9), + svef32mm: bit::test(auxv.hwcap2, 10), + svef64mm: bit::test(auxv.hwcap2, 11), + // svebf16: bit::test(auxv.hwcap2, 12), + i8mm: bit::test(auxv.hwcap2, 13), + bf16: bit::test(auxv.hwcap2, 14), + // dgh: bit::test(auxv.hwcap2, 15), + rng: bit::test(auxv.hwcap2, 16), + bti: bit::test(auxv.hwcap2, 17), + mte: bit::test(auxv.hwcap2, 18), + ecv: bit::test(auxv.hwcap2, 19), + // afp: bit::test(auxv.hwcap2, 20), + // rpres: bit::test(auxv.hwcap2, 21), + // mte3: bit::test(auxv.hwcap2, 22), + sme: bit::test(auxv.hwcap2, 23), + smei16i64: bit::test(auxv.hwcap2, 24), + smef64f64: bit::test(auxv.hwcap2, 25), + // smei8i32: bit::test(auxv.hwcap2, 26), + // smef16f32: bit::test(auxv.hwcap2, 27), + // smeb16f32: bit::test(auxv.hwcap2, 28), + // smef32f32: bit::test(auxv.hwcap2, 29), + smefa64: bit::test(auxv.hwcap2, 30), + wfxt: bit::test(auxv.hwcap2, 31), + // ebf16: bit::test(auxv.hwcap2, 32), + // sveebf16: bit::test(auxv.hwcap2, 33), + cssc: bit::test(auxv.hwcap2, 34), + // rprfm: bit::test(auxv.hwcap2, 35), + sve2p1: bit::test(auxv.hwcap2, 36), + sme2: bit::test(auxv.hwcap2, 37), + sme2p1: bit::test(auxv.hwcap2, 38), + // smei16i32: bit::test(auxv.hwcap2, 39), + // smebi32i32: bit::test(auxv.hwcap2, 40), + smeb16b16: bit::test(auxv.hwcap2, 41), + smef16f16: bit::test(auxv.hwcap2, 42), + mops: bit::test(auxv.hwcap2, 43), + hbc: bit::test(auxv.hwcap2, 44), + sveb16b16: bit::test(auxv.hwcap2, 45), + lrcpc3: bit::test(auxv.hwcap2, 46), + lse128: bit::test(auxv.hwcap2, 47), + fpmr: bit::test(auxv.hwcap2, 48), + lut: bit::test(auxv.hwcap2, 49), + faminmax: bit::test(auxv.hwcap2, 50), + f8cvt: bit::test(auxv.hwcap2, 51), + f8fma: bit::test(auxv.hwcap2, 52), + f8dp4: bit::test(auxv.hwcap2, 53), + f8dp2: bit::test(auxv.hwcap2, 54), + f8e4m3: bit::test(auxv.hwcap2, 55), + f8e5m2: bit::test(auxv.hwcap2, 56), + smelutv2: bit::test(auxv.hwcap2, 57), + smef8f16: bit::test(auxv.hwcap2, 58), + smef8f32: bit::test(auxv.hwcap2, 59), + smesf8fma: bit::test(auxv.hwcap2, 60), + smesf8dp4: bit::test(auxv.hwcap2, 61), + smesf8dp2: bit::test(auxv.hwcap2, 62), + // pauthlr: bit::test(auxv.hwcap2, ??), + } + } +} + +impl AtHwcap { + /// Initializes the cache from the feature -bits. + /// + /// The feature dependencies here come directly from LLVM's feature definitions: + /// https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AArch64/AArch64.td + fn cache(self, is_exynos9810: bool) -> cache::Initializer { + let mut value = cache::Initializer::default(); + { + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + // Samsung Exynos 9810 has a bug that big and little cores have different + // ISAs. And on older Android (pre-9), the kernel incorrectly reports + // that features available only on some cores are available on all cores. + // So, only check features that are known to be available on exynos-m3: + // $ rustc --print cfg --target aarch64-linux-android -C target-cpu=exynos-m3 | grep target_feature + // See also https://github.com/rust-lang/stdarch/pull/1378#discussion_r1103748342. + if is_exynos9810 { + enable_feature(Feature::fp, self.fp); + enable_feature(Feature::crc, self.crc32); + // ASIMD support requires float support - if half-floats are + // supported, it also requires half-float support: + let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp); + enable_feature(Feature::asimd, asimd); + // Cryptographic extensions require ASIMD + // AES also covers FEAT_PMULL + enable_feature(Feature::aes, self.aes && self.pmull && asimd); + enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd); + return value; + } + + enable_feature(Feature::fp, self.fp); + // Half-float support requires float support + enable_feature(Feature::fp16, self.fp && self.fphp); + // FHM (fp16fml in LLVM) requires half float support + enable_feature(Feature::fhm, self.fphp && self.fhm); + enable_feature(Feature::pmull, self.pmull); + enable_feature(Feature::crc, self.crc32); + enable_feature(Feature::lse, self.atomics); + enable_feature(Feature::lse2, self.uscat); + enable_feature(Feature::lse128, self.lse128 && self.atomics); + enable_feature(Feature::rcpc, self.lrcpc); + // RCPC2 (rcpc-immo in LLVM) requires RCPC support + let rcpc2 = self.ilrcpc && self.lrcpc; + enable_feature(Feature::rcpc2, rcpc2); + enable_feature(Feature::rcpc3, self.lrcpc3 && rcpc2); + enable_feature(Feature::dit, self.dit); + enable_feature(Feature::flagm, self.flagm); + enable_feature(Feature::flagm2, self.flagm2); + enable_feature(Feature::ssbs, self.ssbs); + enable_feature(Feature::sb, self.sb); + enable_feature(Feature::paca, self.paca); + enable_feature(Feature::pacg, self.pacg); + // enable_feature(Feature::pauth_lr, self.pauthlr); + enable_feature(Feature::dpb, self.dcpop); + enable_feature(Feature::dpb2, self.dcpodp); + enable_feature(Feature::rand, self.rng); + enable_feature(Feature::bti, self.bti); + enable_feature(Feature::mte, self.mte); + // jsconv requires float support + enable_feature(Feature::jsconv, self.jscvt && self.fp); + enable_feature(Feature::rdm, self.asimdrdm); + enable_feature(Feature::dotprod, self.asimddp); + enable_feature(Feature::frintts, self.frint); + + // FEAT_I8MM & FEAT_BF16 also include optional SVE components which linux exposes + // separately. We ignore that distinction here. + enable_feature(Feature::i8mm, self.i8mm); + enable_feature(Feature::bf16, self.bf16); + + // ASIMD support requires float support - if half-floats are + // supported, it also requires half-float support: + let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp); + enable_feature(Feature::asimd, asimd); + // ASIMD extensions require ASIMD support: + enable_feature(Feature::fcma, self.fcma && asimd); + enable_feature(Feature::sve, self.sve && asimd); + + // SVE extensions require SVE & ASIMD + enable_feature(Feature::f32mm, self.svef32mm && self.sve && asimd); + enable_feature(Feature::f64mm, self.svef64mm && self.sve && asimd); + + // Cryptographic extensions require ASIMD + enable_feature(Feature::aes, self.aes && asimd); + enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd); + // SHA512/SHA3 require SHA1 & SHA256 + enable_feature( + Feature::sha3, + self.sha512 && self.sha3 && self.sha1 && self.sha2 && asimd, + ); + enable_feature(Feature::sm4, self.sm3 && self.sm4 && asimd); + + // SVE2 requires SVE + let sve2 = self.sve2 && self.sve && asimd; + enable_feature(Feature::sve2, sve2); + enable_feature(Feature::sve2p1, self.sve2p1 && sve2); + // SVE2 extensions require SVE2 and crypto features + enable_feature( + Feature::sve2_aes, + self.sveaes && self.svepmull && sve2 && self.aes, + ); + enable_feature( + Feature::sve2_sm4, + self.svesm4 && sve2 && self.sm3 && self.sm4, + ); + enable_feature( + Feature::sve2_sha3, + self.svesha3 && sve2 && self.sha512 && self.sha3 && self.sha1 && self.sha2, + ); + enable_feature(Feature::sve2_bitperm, self.svebitperm && self.sve2); + enable_feature(Feature::sve_b16b16, self.bf16 && self.sveb16b16); + enable_feature(Feature::hbc, self.hbc); + enable_feature(Feature::mops, self.mops); + enable_feature(Feature::ecv, self.ecv); + enable_feature(Feature::lut, self.lut); + enable_feature(Feature::cssc, self.cssc); + enable_feature(Feature::fpmr, self.fpmr); + enable_feature(Feature::faminmax, self.faminmax); + let fp8 = self.f8cvt && self.faminmax && self.lut && self.bf16; + enable_feature(Feature::fp8, fp8); + let fp8fma = self.f8fma && fp8; + enable_feature(Feature::fp8fma, fp8fma); + let fp8dot4 = self.f8dp4 && fp8fma; + enable_feature(Feature::fp8dot4, fp8dot4); + enable_feature(Feature::fp8dot2, self.f8dp2 && fp8dot4); + enable_feature(Feature::wfxt, self.wfxt); + let sme = self.sme && self.bf16; + enable_feature(Feature::sme, sme); + enable_feature(Feature::sme_i16i64, self.smei16i64 && sme); + enable_feature(Feature::sme_f64f64, self.smef64f64 && sme); + enable_feature(Feature::sme_fa64, self.smefa64 && sme && sve2); + let sme2 = self.sme2 && sme; + enable_feature(Feature::sme2, sme2); + enable_feature(Feature::sme2p1, self.sme2p1 && sme2); + enable_feature( + Feature::sme_b16b16, + sme2 && self.bf16 && self.sveb16b16 && self.smeb16b16, + ); + enable_feature(Feature::sme_f16f16, self.smef16f16 && sme2); + enable_feature(Feature::sme_lutv2, self.smelutv2); + let sme_f8f32 = self.smef8f32 && sme2 && fp8; + enable_feature(Feature::sme_f8f32, sme_f8f32); + enable_feature(Feature::sme_f8f16, self.smef8f16 && sme_f8f32); + let ssve_fp8fma = self.smesf8fma && sme2 && fp8; + enable_feature(Feature::ssve_fp8fma, ssve_fp8fma); + let ssve_fp8dot4 = self.smesf8dp4 && ssve_fp8fma; + enable_feature(Feature::ssve_fp8dot4, ssve_fp8dot4); + enable_feature(Feature::ssve_fp8dot2, self.smesf8dp2 && ssve_fp8dot4); + } + value + } +} + +#[cfg(target_endian = "little")] +#[cfg(test)] +mod tests { + use super::*; + + #[cfg(feature = "std_detect_file_io")] + mod auxv_from_file { + use super::auxvec::auxv_from_file; + use super::*; + // The baseline hwcaps used in the (artificial) auxv test files. + fn baseline_hwcaps() -> AtHwcap { + AtHwcap { + fp: true, + asimd: true, + aes: true, + pmull: true, + sha1: true, + sha2: true, + crc32: true, + atomics: true, + fphp: true, + asimdhp: true, + asimdrdm: true, + lrcpc: true, + dcpop: true, + asimddp: true, + ssbs: true, + ..AtHwcap::default() + } + } + + #[test] + fn linux_empty_hwcap2_aarch64() { + let file = concat!( + env!("CARGO_MANIFEST_DIR"), + "/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv" + ); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + println!("HWCAP : 0x{:0x}", v.hwcap); + println!("HWCAP2: 0x{:0x}", v.hwcap2); + assert_eq!(AtHwcap::from(v), baseline_hwcaps()); + } + #[test] + fn linux_no_hwcap2_aarch64() { + let file = concat!( + env!("CARGO_MANIFEST_DIR"), + "/src/detect/test_data/linux-no-hwcap2-aarch64.auxv" + ); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + println!("HWCAP : 0x{:0x}", v.hwcap); + println!("HWCAP2: 0x{:0x}", v.hwcap2); + assert_eq!(AtHwcap::from(v), baseline_hwcaps()); + } + #[test] + fn linux_hwcap2_aarch64() { + let file = concat!( + env!("CARGO_MANIFEST_DIR"), + "/src/detect/test_data/linux-hwcap2-aarch64.auxv" + ); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + println!("HWCAP : 0x{:0x}", v.hwcap); + println!("HWCAP2: 0x{:0x}", v.hwcap2); + assert_eq!( + AtHwcap::from(v), + AtHwcap { + // Some other HWCAP bits. + paca: true, + pacg: true, + // HWCAP2-only bits. + dcpodp: true, + frint: true, + rng: true, + bti: true, + mte: true, + ..baseline_hwcaps() + } + ); + } + } +} diff --git a/library/std_detect/src/detect/os/linux/arm.rs b/library/std_detect/src/detect/os/linux/arm.rs new file mode 100644 index 00000000000..bbb173227d0 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/arm.rs @@ -0,0 +1,34 @@ +//! Run-time feature detection for ARM on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm/include/uapi/asm/hwcap.h + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::i8mm, bit::test(auxv.hwcap, 27)); + enable_feature(&mut value, Feature::dotprod, bit::test(auxv.hwcap, 24)); + enable_feature(&mut value, Feature::neon, bit::test(auxv.hwcap, 12)); + enable_feature(&mut value, Feature::pmull, bit::test(auxv.hwcap2, 1)); + enable_feature(&mut value, Feature::crc, bit::test(auxv.hwcap2, 4)); + enable_feature(&mut value, Feature::aes, bit::test(auxv.hwcap2, 0)); + // SHA2 requires SHA1 & SHA2 features + enable_feature( + &mut value, + Feature::sha2, + bit::test(auxv.hwcap2, 2) && bit::test(auxv.hwcap2, 3), + ); + return value; + } + value +} diff --git a/library/std_detect/src/detect/os/linux/auxvec.rs b/library/std_detect/src/detect/os/linux/auxvec.rs new file mode 100644 index 00000000000..54f52aa7533 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/auxvec.rs @@ -0,0 +1,341 @@ +//! Parses ELF auxiliary vectors. +#![allow(dead_code)] + +pub(crate) const AT_NULL: usize = 0; + +/// Key to access the CPU Hardware capabilities bitfield. +pub(crate) const AT_HWCAP: usize = 16; +/// Key to access the CPU Hardware capabilities 2 bitfield. +#[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", +))] +pub(crate) const AT_HWCAP2: usize = 26; + +/// Cache HWCAP bitfields of the ELF Auxiliary Vector. +/// +/// If an entry cannot be read all the bits in the bitfield are set to zero. +/// This should be interpreted as all the features being disabled. +#[derive(Debug, Copy, Clone)] +#[cfg_attr(test, derive(PartialEq))] +pub(crate) struct AuxVec { + pub hwcap: usize, + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + pub hwcap2: usize, +} + +/// ELF Auxiliary Vector +/// +/// The auxiliary vector is a memory region in a running ELF program's stack +/// composed of (key: usize, value: usize) pairs. +/// +/// The keys used in the aux vector are platform dependent. For Linux, they are +/// defined in [linux/auxvec.h][auxvec_h]. The hardware capabilities of a given +/// CPU can be queried with the `AT_HWCAP` and `AT_HWCAP2` keys. +/// +/// There is no perfect way of reading the auxiliary vector. +/// +/// - If the `std_detect_dlsym_getauxval` cargo feature is enabled, this will use +/// `getauxval` if its linked to the binary, and otherwise proceed to a fallback implementation. +/// When `std_detect_dlsym_getauxval` is disabled, this will assume that `getauxval` is +/// linked to the binary - if that is not the case the behavior is undefined. +/// - Otherwise, if the `std_detect_file_io` cargo feature is enabled, it will +/// try to read `/proc/self/auxv`. +/// - If that fails, this function returns an error. +/// +/// Note that run-time feature detection is not invoked for features that can +/// be detected at compile-time. +/// +/// Note: The `std_detect_dlsym_getauxval` cargo feature is ignored on +/// `*-linux-{gnu,musl,ohos}*` and `*-android*` targets because we can safely assume `getauxval` +/// is linked to the binary. +/// - `*-linux-gnu*` targets ([since Rust 1.64](https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html)) +/// have glibc requirements higher than [glibc 2.16 that added `getauxval`](https://sourceware.org/legacy-ml/libc-announce/2012/msg00000.html). +/// - `*-linux-musl*` targets ([at least since Rust 1.15](https://github.com/rust-lang/rust/blob/1.15.0/src/ci/docker/x86_64-musl/build-musl.sh#L15)) +/// use musl newer than [musl 1.1.0 that added `getauxval`](https://git.musl-libc.org/cgit/musl/tree/WHATSNEW?h=v1.1.0#n1197) +/// - `*-linux-ohos*` targets use a [fork of musl 1.2](https://gitee.com/openharmony/docs/blob/master/en/application-dev/reference/native-lib/musl.md) +/// - `*-android*` targets ([since Rust 1.68](https://blog.rust-lang.org/2023/01/09/android-ndk-update-r25.html)) +/// have the minimum supported API level higher than [Android 4.3 (API level 18) that added `getauxval`](https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/auxv.h#L49). +/// +/// For more information about when `getauxval` is available check the great +/// [`auxv` crate documentation][auxv_docs]. +/// +/// [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h +/// [auxv_docs]: https://docs.rs/auxv/0.3.3/auxv/ +pub(crate) fn auxv() -> Result<AuxVec, ()> { + // Try to call a getauxval function. + if let Ok(hwcap) = getauxval(AT_HWCAP) { + // Targets with only AT_HWCAP: + #[cfg(any( + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "mips", + target_arch = "mips64", + target_arch = "loongarch32", + target_arch = "loongarch64", + ))] + { + // Zero could indicate that no features were detected, but it's also used to indicate + // an error. In either case, try the fallback. + if hwcap != 0 { + return Ok(AuxVec { hwcap }); + } + } + + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + { + if let Ok(hwcap2) = getauxval(AT_HWCAP2) { + // Zero could indicate that no features were detected, but it's also used to indicate + // an error. In particular, on many platforms AT_HWCAP2 will be legitimately zero, + // since it contains the most recent feature flags. Use the fallback only if no + // features were detected at all. + if hwcap != 0 || hwcap2 != 0 { + return Ok(AuxVec { hwcap, hwcap2 }); + } + } + } + + // Intentionnaly not used + let _ = hwcap; + } + + #[cfg(feature = "std_detect_file_io")] + { + // If calling getauxval fails, try to read the auxiliary vector from + // its file: + auxv_from_file("/proc/self/auxv") + } + #[cfg(not(feature = "std_detect_file_io"))] + { + Err(()) + } +} + +/// Tries to read the `key` from the auxiliary vector by calling the +/// `getauxval` function. If the function is not linked, this function return `Err`. +fn getauxval(key: usize) -> Result<usize, ()> { + type F = unsafe extern "C" fn(libc::c_ulong) -> libc::c_ulong; + cfg_if::cfg_if! { + if #[cfg(all( + feature = "std_detect_dlsym_getauxval", + not(all( + target_os = "linux", + any(target_env = "gnu", target_env = "musl", target_env = "ohos"), + )), + not(target_os = "android"), + ))] { + let ffi_getauxval: F = unsafe { + let ptr = libc::dlsym(libc::RTLD_DEFAULT, c"getauxval".as_ptr()); + if ptr.is_null() { + return Err(()); + } + core::mem::transmute(ptr) + }; + } else { + let ffi_getauxval: F = libc::getauxval; + } + } + Ok(unsafe { ffi_getauxval(key as libc::c_ulong) as usize }) +} + +/// Tries to read the auxiliary vector from the `file`. If this fails, this +/// function returns `Err`. +#[cfg(feature = "std_detect_file_io")] +pub(super) fn auxv_from_file(file: &str) -> Result<AuxVec, ()> { + let file = super::read_file(file)?; + + // See <https://github.com/torvalds/linux/blob/v5.15/include/uapi/linux/auxvec.h>. + // + // The auxiliary vector contains at most 34 (key,value) fields: from + // `AT_MINSIGSTKSZ` to `AT_NULL`, but its number may increase. + let len = file.len(); + let mut buf = alloc::vec![0_usize; 1 + len / core::mem::size_of::<usize>()]; + unsafe { + core::ptr::copy_nonoverlapping(file.as_ptr(), buf.as_mut_ptr() as *mut u8, len); + } + + auxv_from_buf(&buf) +} + +/// Tries to interpret the `buffer` as an auxiliary vector. If that fails, this +/// function returns `Err`. +#[cfg(feature = "std_detect_file_io")] +fn auxv_from_buf(buf: &[usize]) -> Result<AuxVec, ()> { + // Targets with only AT_HWCAP: + #[cfg(any( + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "mips", + target_arch = "mips64", + target_arch = "loongarch32", + target_arch = "loongarch64", + ))] + { + for el in buf.chunks(2) { + match el[0] { + AT_NULL => break, + AT_HWCAP => return Ok(AuxVec { hwcap: el[1] }), + _ => (), + } + } + } + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + { + let mut hwcap = None; + // For some platforms, AT_HWCAP2 was added recently, so let it default to zero. + let mut hwcap2 = 0; + for el in buf.chunks(2) { + match el[0] { + AT_NULL => break, + AT_HWCAP => hwcap = Some(el[1]), + AT_HWCAP2 => hwcap2 = el[1], + _ => (), + } + } + + if let Some(hwcap) = hwcap { + return Ok(AuxVec { hwcap, hwcap2 }); + } + } + // Suppress unused variable + let _ = buf; + Err(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + // FIXME: on mips/mips64 getauxval returns 0, and /proc/self/auxv + // does not always contain the AT_HWCAP key under qemu. + #[cfg(any( + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + #[test] + fn auxv_crate() { + let v = auxv(); + if let Ok(hwcap) = getauxval(AT_HWCAP) { + let rt_hwcap = v.expect("failed to find hwcap key").hwcap; + assert_eq!(rt_hwcap, hwcap); + } + + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + { + if let Ok(hwcap2) = getauxval(AT_HWCAP2) { + let rt_hwcap2 = v.expect("failed to find hwcap2 key").hwcap2; + assert_eq!(rt_hwcap2, hwcap2); + } + } + } + + #[test] + fn auxv_dump() { + if let Ok(auxvec) = auxv() { + println!("{:?}", auxvec); + } else { + println!("both getauxval() and reading /proc/self/auxv failed!"); + } + } + + #[cfg(feature = "std_detect_file_io")] + cfg_if::cfg_if! { + if #[cfg(target_arch = "arm")] { + #[test] + fn linux_rpi3() { + let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-rpi3.auxv"); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + assert_eq!(v.hwcap, 4174038); + assert_eq!(v.hwcap2, 16); + } + + #[test] + fn linux_macos_vb() { + let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv"); + println!("file: {file}"); + // The file contains HWCAP but not HWCAP2. In that case, we treat HWCAP2 as zero. + let v = auxv_from_file(file).unwrap(); + assert_eq!(v.hwcap, 126614527); + assert_eq!(v.hwcap2, 0); + } + } else if #[cfg(target_arch = "aarch64")] { + #[cfg(target_endian = "little")] + #[test] + fn linux_artificial_aarch64() { + let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-artificial-aarch64.auxv"); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + assert_eq!(v.hwcap, 0x0123456789abcdef); + assert_eq!(v.hwcap2, 0x02468ace13579bdf); + } + #[cfg(target_endian = "little")] + #[test] + fn linux_no_hwcap2_aarch64() { + let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-no-hwcap2-aarch64.auxv"); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + // An absent HWCAP2 is treated as zero, and does not prevent acceptance of HWCAP. + assert_ne!(v.hwcap, 0); + assert_eq!(v.hwcap2, 0); + } + } + } + + #[test] + #[cfg(feature = "std_detect_file_io")] + fn auxv_dump_procfs() { + if let Ok(auxvec) = auxv_from_file("/proc/self/auxv") { + println!("{:?}", auxvec); + } else { + println!("reading /proc/self/auxv failed!"); + } + } + + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + #[test] + #[cfg(feature = "std_detect_file_io")] + fn auxv_crate_procfs() { + if let Ok(procfs_auxv) = auxv_from_file("/proc/self/auxv") { + assert_eq!(auxv().unwrap(), procfs_auxv); + } + } +} diff --git a/library/std_detect/src/detect/os/linux/loongarch.rs b/library/std_detect/src/detect/os/linux/loongarch.rs new file mode 100644 index 00000000000..14cc7a73183 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/loongarch.rs @@ -0,0 +1,68 @@ +//! Run-time feature detection for LoongArch on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; +use core::arch::asm; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, feature, enable| { + if enable { + value.set(feature as u32); + } + }; + + // The values are part of the platform-specific [cpucfg] + // + // [cpucfg]: LoongArch Reference Manual Volume 1: Basic Architecture v1.1 + let cpucfg2: usize; + unsafe { + asm!( + "cpucfg {}, {}", + out(reg) cpucfg2, in(reg) 2, + options(pure, nomem, preserves_flags, nostack) + ); + } + let cpucfg3: usize; + unsafe { + asm!( + "cpucfg {}, {}", + out(reg) cpucfg3, in(reg) 3, + options(pure, nomem, preserves_flags, nostack) + ); + } + enable_feature(&mut value, Feature::frecipe, bit::test(cpucfg2, 25)); + enable_feature(&mut value, Feature::div32, bit::test(cpucfg2, 26)); + enable_feature(&mut value, Feature::lam_bh, bit::test(cpucfg2, 27)); + enable_feature(&mut value, Feature::lamcas, bit::test(cpucfg2, 28)); + enable_feature(&mut value, Feature::scq, bit::test(cpucfg2, 30)); + enable_feature(&mut value, Feature::ld_seq_sa, bit::test(cpucfg3, 23)); + + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/loongarch/include/uapi/asm/hwcap.h + if let Ok(auxv) = auxvec::auxv() { + enable_feature( + &mut value, + Feature::f, + bit::test(cpucfg2, 1) && bit::test(auxv.hwcap, 3), + ); + enable_feature( + &mut value, + Feature::d, + bit::test(cpucfg2, 2) && bit::test(auxv.hwcap, 3), + ); + enable_feature(&mut value, Feature::lsx, bit::test(auxv.hwcap, 4)); + enable_feature(&mut value, Feature::lasx, bit::test(auxv.hwcap, 5)); + enable_feature( + &mut value, + Feature::lbt, + bit::test(auxv.hwcap, 10) && bit::test(auxv.hwcap, 11) && bit::test(auxv.hwcap, 12), + ); + enable_feature(&mut value, Feature::lvz, bit::test(auxv.hwcap, 9)); + enable_feature(&mut value, Feature::ual, bit::test(auxv.hwcap, 2)); + return value; + } + value +} diff --git a/library/std_detect/src/detect/os/linux/mips.rs b/library/std_detect/src/detect/os/linux/mips.rs new file mode 100644 index 00000000000..0cfa8869887 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/mips.rs @@ -0,0 +1,23 @@ +//! Run-time feature detection for MIPS on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/mips/include/uapi/asm/hwcap.h + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::msa, bit::test(auxv.hwcap, 1)); + return value; + } + value +} diff --git a/library/std_detect/src/detect/os/linux/mod.rs b/library/std_detect/src/detect/os/linux/mod.rs new file mode 100644 index 00000000000..9accd41717b --- /dev/null +++ b/library/std_detect/src/detect/os/linux/mod.rs @@ -0,0 +1,67 @@ +//! Run-time feature detection on Linux +//! +#[cfg(feature = "std_detect_file_io")] +use alloc::vec::Vec; + +mod auxvec; + +#[cfg(feature = "std_detect_file_io")] +fn read_file(path: &str) -> Result<Vec<u8>, ()> { + let mut path = Vec::from(path.as_bytes()); + path.push(0); + + unsafe { + let file = libc::open(path.as_ptr() as *const libc::c_char, libc::O_RDONLY); + if file == -1 { + return Err(()); + } + + let mut data = Vec::new(); + loop { + data.reserve(4096); + let spare = data.spare_capacity_mut(); + match libc::read(file, spare.as_mut_ptr() as *mut _, spare.len()) { + -1 => { + libc::close(file); + return Err(()); + } + 0 => break, + n => data.set_len(data.len() + n as usize), + } + } + + libc::close(file); + Ok(data) + } +} + +cfg_if::cfg_if! { + if #[cfg(target_arch = "aarch64")] { + mod aarch64; + pub(crate) use self::aarch64::detect_features; + } else if #[cfg(target_arch = "arm")] { + mod arm; + pub(crate) use self::arm::detect_features; + } else if #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] { + mod riscv; + pub(crate) use self::riscv::detect_features; + } else if #[cfg(any(target_arch = "mips", target_arch = "mips64"))] { + mod mips; + pub(crate) use self::mips::detect_features; + } else if #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] { + mod powerpc; + pub(crate) use self::powerpc::detect_features; + } else if #[cfg(any(target_arch = "loongarch32", target_arch = "loongarch64"))] { + mod loongarch; + pub(crate) use self::loongarch::detect_features; + } else if #[cfg(target_arch = "s390x")] { + mod s390x; + pub(crate) use self::s390x::detect_features; + } else { + use crate::detect::cache; + /// Performs run-time feature detection. + pub(crate) fn detect_features() -> cache::Initializer { + cache::Initializer::default() + } + } +} diff --git a/library/std_detect/src/detect/os/linux/powerpc.rs b/library/std_detect/src/detect/os/linux/powerpc.rs new file mode 100644 index 00000000000..6a4f7e715d9 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/powerpc.rs @@ -0,0 +1,35 @@ +//! Run-time feature detection for PowerPC on Linux. + +use super::auxvec; +use crate::detect::{Feature, cache}; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + // The values are part of the platform-specific [asm/cputable.h][cputable] + // + // [cputable]: https://github.com/torvalds/linux/blob/master/arch/powerpc/include/uapi/asm/cputable.h + if let Ok(auxv) = auxvec::auxv() { + // note: the PowerPC values are the mask to do the test (instead of the + // index of the bit to test like in ARM and Aarch64) + enable_feature(&mut value, Feature::altivec, auxv.hwcap & 0x10000000 != 0); + enable_feature(&mut value, Feature::vsx, auxv.hwcap & 0x00000080 != 0); + let power8_features = auxv.hwcap2 & 0x80000000 != 0; + enable_feature(&mut value, Feature::power8, power8_features); + enable_feature(&mut value, Feature::power8_altivec, power8_features); + enable_feature(&mut value, Feature::power8_crypto, power8_features); + enable_feature(&mut value, Feature::power8_vector, power8_features); + let power9_features = auxv.hwcap2 & 0x00800000 != 0; + enable_feature(&mut value, Feature::power9, power9_features); + enable_feature(&mut value, Feature::power9_altivec, power9_features); + enable_feature(&mut value, Feature::power9_vector, power9_features); + return value; + } + value +} diff --git a/library/std_detect/src/detect/os/linux/riscv.rs b/library/std_detect/src/detect/os/linux/riscv.rs new file mode 100644 index 00000000000..db20538af95 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/riscv.rs @@ -0,0 +1,330 @@ +//! Run-time feature detection for RISC-V on Linux. +//! +//! On RISC-V, detection using auxv only supports single-letter extensions. +//! So, we use riscv_hwprobe that supports multi-letter extensions if available. +//! <https://www.kernel.org/doc/html/latest/arch/riscv/hwprobe.html> + +use core::ptr; + +use super::super::riscv::imply_features; +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +// See <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/prctl.h?h=v6.15> +// for runtime status query constants. +const PR_RISCV_V_GET_CONTROL: libc::c_int = 70; +const PR_RISCV_V_VSTATE_CTRL_ON: libc::c_int = 2; +const PR_RISCV_V_VSTATE_CTRL_CUR_MASK: libc::c_int = 3; + +// See <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/include/uapi/asm/hwprobe.h?h=v6.15> +// for riscv_hwprobe struct and hardware probing constants. + +#[repr(C)] +struct riscv_hwprobe { + key: i64, + value: u64, +} + +impl riscv_hwprobe { + // key is overwritten to -1 if not supported by riscv_hwprobe syscall. + pub fn get(&self) -> Option<u64> { + (self.key != -1).then_some(self.value) + } +} + +#[allow(non_upper_case_globals)] +const __NR_riscv_hwprobe: libc::c_long = 258; + +const RISCV_HWPROBE_KEY_BASE_BEHAVIOR: i64 = 3; +const RISCV_HWPROBE_BASE_BEHAVIOR_IMA: u64 = 1 << 0; + +const RISCV_HWPROBE_KEY_IMA_EXT_0: i64 = 4; +const RISCV_HWPROBE_IMA_FD: u64 = 1 << 0; +const RISCV_HWPROBE_IMA_C: u64 = 1 << 1; +const RISCV_HWPROBE_IMA_V: u64 = 1 << 2; +const RISCV_HWPROBE_EXT_ZBA: u64 = 1 << 3; +const RISCV_HWPROBE_EXT_ZBB: u64 = 1 << 4; +const RISCV_HWPROBE_EXT_ZBS: u64 = 1 << 5; +const RISCV_HWPROBE_EXT_ZICBOZ: u64 = 1 << 6; +const RISCV_HWPROBE_EXT_ZBC: u64 = 1 << 7; +const RISCV_HWPROBE_EXT_ZBKB: u64 = 1 << 8; +const RISCV_HWPROBE_EXT_ZBKC: u64 = 1 << 9; +const RISCV_HWPROBE_EXT_ZBKX: u64 = 1 << 10; +const RISCV_HWPROBE_EXT_ZKND: u64 = 1 << 11; +const RISCV_HWPROBE_EXT_ZKNE: u64 = 1 << 12; +const RISCV_HWPROBE_EXT_ZKNH: u64 = 1 << 13; +const RISCV_HWPROBE_EXT_ZKSED: u64 = 1 << 14; +const RISCV_HWPROBE_EXT_ZKSH: u64 = 1 << 15; +const RISCV_HWPROBE_EXT_ZKT: u64 = 1 << 16; +const RISCV_HWPROBE_EXT_ZVBB: u64 = 1 << 17; +const RISCV_HWPROBE_EXT_ZVBC: u64 = 1 << 18; +const RISCV_HWPROBE_EXT_ZVKB: u64 = 1 << 19; +const RISCV_HWPROBE_EXT_ZVKG: u64 = 1 << 20; +const RISCV_HWPROBE_EXT_ZVKNED: u64 = 1 << 21; +const RISCV_HWPROBE_EXT_ZVKNHA: u64 = 1 << 22; +const RISCV_HWPROBE_EXT_ZVKNHB: u64 = 1 << 23; +const RISCV_HWPROBE_EXT_ZVKSED: u64 = 1 << 24; +const RISCV_HWPROBE_EXT_ZVKSH: u64 = 1 << 25; +const RISCV_HWPROBE_EXT_ZVKT: u64 = 1 << 26; +const RISCV_HWPROBE_EXT_ZFH: u64 = 1 << 27; +const RISCV_HWPROBE_EXT_ZFHMIN: u64 = 1 << 28; +const RISCV_HWPROBE_EXT_ZIHINTNTL: u64 = 1 << 29; +const RISCV_HWPROBE_EXT_ZVFH: u64 = 1 << 30; +const RISCV_HWPROBE_EXT_ZVFHMIN: u64 = 1 << 31; +const RISCV_HWPROBE_EXT_ZFA: u64 = 1 << 32; +const RISCV_HWPROBE_EXT_ZTSO: u64 = 1 << 33; +const RISCV_HWPROBE_EXT_ZACAS: u64 = 1 << 34; +const RISCV_HWPROBE_EXT_ZICOND: u64 = 1 << 35; +const RISCV_HWPROBE_EXT_ZIHINTPAUSE: u64 = 1 << 36; +const RISCV_HWPROBE_EXT_ZVE32X: u64 = 1 << 37; +const RISCV_HWPROBE_EXT_ZVE32F: u64 = 1 << 38; +const RISCV_HWPROBE_EXT_ZVE64X: u64 = 1 << 39; +const RISCV_HWPROBE_EXT_ZVE64F: u64 = 1 << 40; +const RISCV_HWPROBE_EXT_ZVE64D: u64 = 1 << 41; +const RISCV_HWPROBE_EXT_ZIMOP: u64 = 1 << 42; +const RISCV_HWPROBE_EXT_ZCA: u64 = 1 << 43; +const RISCV_HWPROBE_EXT_ZCB: u64 = 1 << 44; +const RISCV_HWPROBE_EXT_ZCD: u64 = 1 << 45; +const RISCV_HWPROBE_EXT_ZCF: u64 = 1 << 46; +const RISCV_HWPROBE_EXT_ZCMOP: u64 = 1 << 47; +const RISCV_HWPROBE_EXT_ZAWRS: u64 = 1 << 48; +// Excluded because it only reports the existence of `prctl`-based pointer masking control. +// const RISCV_HWPROBE_EXT_SUPM: u64 = 1 << 49; +const RISCV_HWPROBE_EXT_ZICNTR: u64 = 1 << 50; +const RISCV_HWPROBE_EXT_ZIHPM: u64 = 1 << 51; +const RISCV_HWPROBE_EXT_ZFBFMIN: u64 = 1 << 52; +const RISCV_HWPROBE_EXT_ZVFBFMIN: u64 = 1 << 53; +const RISCV_HWPROBE_EXT_ZVFBFWMA: u64 = 1 << 54; +const RISCV_HWPROBE_EXT_ZICBOM: u64 = 1 << 55; +const RISCV_HWPROBE_EXT_ZAAMO: u64 = 1 << 56; +const RISCV_HWPROBE_EXT_ZALRSC: u64 = 1 << 57; + +const RISCV_HWPROBE_KEY_CPUPERF_0: i64 = 5; +const RISCV_HWPROBE_MISALIGNED_FAST: u64 = 3; +const RISCV_HWPROBE_MISALIGNED_MASK: u64 = 7; + +const RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF: i64 = 9; +const RISCV_HWPROBE_MISALIGNED_SCALAR_FAST: u64 = 3; + +const RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF: i64 = 10; +const RISCV_HWPROBE_MISALIGNED_VECTOR_FAST: u64 = 3; + +// syscall returns an unsupported error if riscv_hwprobe is not supported, +// so we can safely use this function on older versions of Linux. +fn _riscv_hwprobe(out: &mut [riscv_hwprobe]) -> bool { + unsafe fn __riscv_hwprobe( + pairs: *mut riscv_hwprobe, + pair_count: libc::size_t, + cpu_set_size: libc::size_t, + cpus: *mut libc::c_ulong, + flags: libc::c_uint, + ) -> libc::c_long { + unsafe { + libc::syscall( + __NR_riscv_hwprobe, + pairs, + pair_count, + cpu_set_size, + cpus, + flags, + ) + } + } + + unsafe { __riscv_hwprobe(out.as_mut_ptr(), out.len(), 0, ptr::null_mut(), 0) == 0 } +} + +/// Read list of supported features from (1) the auxiliary vector +/// and (2) the results of `riscv_hwprobe` and `prctl` system calls. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let mut enable_feature = |feature, enable| { + if enable { + value.set(feature as u32); + } + }; + + // Use auxiliary vector to enable single-letter ISA extensions. + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/include/uapi/asm/hwcap.h?h=v6.15 + let auxv = auxvec::auxv().expect("read auxvec"); // should not fail on RISC-V platform + let mut has_i = bit::test(auxv.hwcap, (b'i' - b'a').into()); + #[allow(clippy::eq_op)] + enable_feature(Feature::a, bit::test(auxv.hwcap, (b'a' - b'a').into())); + enable_feature(Feature::c, bit::test(auxv.hwcap, (b'c' - b'a').into())); + enable_feature(Feature::d, bit::test(auxv.hwcap, (b'd' - b'a').into())); + enable_feature(Feature::f, bit::test(auxv.hwcap, (b'f' - b'a').into())); + enable_feature(Feature::m, bit::test(auxv.hwcap, (b'm' - b'a').into())); + let has_v = bit::test(auxv.hwcap, (b'v' - b'a').into()); + let mut is_v_set = false; + + // Use riscv_hwprobe syscall to query more extensions and + // performance-related capabilities. + 'hwprobe: { + macro_rules! init { + { $($name: ident : $key: expr),* $(,)? } => { + #[repr(usize)] + enum Indices { $($name),* } + let mut t = [$(riscv_hwprobe { key: $key, value: 0 }),*]; + macro_rules! data_mut { () => { &mut t } } + macro_rules! query { [$idx: ident] => { t[Indices::$idx as usize].get() } } + } + } + init! { + BaseBehavior: RISCV_HWPROBE_KEY_BASE_BEHAVIOR, + Extensions: RISCV_HWPROBE_KEY_IMA_EXT_0, + MisalignedScalarPerf: RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF, + MisalignedVectorPerf: RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF, + MisalignedScalarPerfFallback: RISCV_HWPROBE_KEY_CPUPERF_0, + }; + if !_riscv_hwprobe(data_mut!()) { + break 'hwprobe; + } + + // Query scalar misaligned behavior. + if let Some(value) = query![MisalignedScalarPerf] { + enable_feature( + Feature::unaligned_scalar_mem, + value == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST, + ); + } else if let Some(value) = query![MisalignedScalarPerfFallback] { + // Deprecated method for fallback + enable_feature( + Feature::unaligned_scalar_mem, + value & RISCV_HWPROBE_MISALIGNED_MASK == RISCV_HWPROBE_MISALIGNED_FAST, + ); + } + + // Query vector misaligned behavior. + if let Some(value) = query![MisalignedVectorPerf] { + enable_feature( + Feature::unaligned_vector_mem, + value == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST, + ); + } + + // Query whether "I" base and extensions "M" and "A" (as in the ISA + // manual version 2.2) are enabled. "I" base at that time corresponds + // to "I", "Zicsr", "Zicntr" and "Zifencei" (as in the ISA manual version + // 20240411). + // This is a current requirement of + // `RISCV_HWPROBE_KEY_IMA_EXT_0`-based tests. + if query![BaseBehavior].is_none_or(|value| value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA == 0) { + break 'hwprobe; + } + has_i = true; + enable_feature(Feature::zicsr, true); + enable_feature(Feature::zicntr, true); + enable_feature(Feature::zifencei, true); + enable_feature(Feature::m, true); + enable_feature(Feature::a, true); + + // Enable features based on `RISCV_HWPROBE_KEY_IMA_EXT_0`. + let Some(ima_ext_0) = query![Extensions] else { + break 'hwprobe; + }; + let test = |mask| (ima_ext_0 & mask) != 0; + + enable_feature(Feature::d, test(RISCV_HWPROBE_IMA_FD)); // F is implied. + enable_feature(Feature::c, test(RISCV_HWPROBE_IMA_C)); + + enable_feature(Feature::zicntr, test(RISCV_HWPROBE_EXT_ZICNTR)); + enable_feature(Feature::zihpm, test(RISCV_HWPROBE_EXT_ZIHPM)); + + enable_feature(Feature::zihintntl, test(RISCV_HWPROBE_EXT_ZIHINTNTL)); + enable_feature(Feature::zihintpause, test(RISCV_HWPROBE_EXT_ZIHINTPAUSE)); + enable_feature(Feature::zimop, test(RISCV_HWPROBE_EXT_ZIMOP)); + enable_feature(Feature::zicbom, test(RISCV_HWPROBE_EXT_ZICBOM)); + enable_feature(Feature::zicboz, test(RISCV_HWPROBE_EXT_ZICBOZ)); + enable_feature(Feature::zicond, test(RISCV_HWPROBE_EXT_ZICOND)); + + enable_feature(Feature::zalrsc, test(RISCV_HWPROBE_EXT_ZALRSC)); + enable_feature(Feature::zaamo, test(RISCV_HWPROBE_EXT_ZAAMO)); + enable_feature(Feature::zawrs, test(RISCV_HWPROBE_EXT_ZAWRS)); + enable_feature(Feature::zacas, test(RISCV_HWPROBE_EXT_ZACAS)); + enable_feature(Feature::ztso, test(RISCV_HWPROBE_EXT_ZTSO)); + + enable_feature(Feature::zba, test(RISCV_HWPROBE_EXT_ZBA)); + enable_feature(Feature::zbb, test(RISCV_HWPROBE_EXT_ZBB)); + enable_feature(Feature::zbs, test(RISCV_HWPROBE_EXT_ZBS)); + enable_feature(Feature::zbc, test(RISCV_HWPROBE_EXT_ZBC)); + + enable_feature(Feature::zbkb, test(RISCV_HWPROBE_EXT_ZBKB)); + enable_feature(Feature::zbkc, test(RISCV_HWPROBE_EXT_ZBKC)); + enable_feature(Feature::zbkx, test(RISCV_HWPROBE_EXT_ZBKX)); + enable_feature(Feature::zknd, test(RISCV_HWPROBE_EXT_ZKND)); + enable_feature(Feature::zkne, test(RISCV_HWPROBE_EXT_ZKNE)); + enable_feature(Feature::zknh, test(RISCV_HWPROBE_EXT_ZKNH)); + enable_feature(Feature::zksed, test(RISCV_HWPROBE_EXT_ZKSED)); + enable_feature(Feature::zksh, test(RISCV_HWPROBE_EXT_ZKSH)); + enable_feature(Feature::zkt, test(RISCV_HWPROBE_EXT_ZKT)); + + enable_feature(Feature::zcmop, test(RISCV_HWPROBE_EXT_ZCMOP)); + enable_feature(Feature::zca, test(RISCV_HWPROBE_EXT_ZCA)); + enable_feature(Feature::zcf, test(RISCV_HWPROBE_EXT_ZCF)); + enable_feature(Feature::zcd, test(RISCV_HWPROBE_EXT_ZCD)); + enable_feature(Feature::zcb, test(RISCV_HWPROBE_EXT_ZCB)); + + enable_feature(Feature::zfh, test(RISCV_HWPROBE_EXT_ZFH)); + enable_feature(Feature::zfhmin, test(RISCV_HWPROBE_EXT_ZFHMIN)); + enable_feature(Feature::zfa, test(RISCV_HWPROBE_EXT_ZFA)); + enable_feature(Feature::zfbfmin, test(RISCV_HWPROBE_EXT_ZFBFMIN)); + + // Use prctl (if any) to determine whether the vector extension + // is enabled on the current thread (assuming the entire process + // share the same status). If prctl fails (e.g. QEMU userland emulator + // as of version 9.2.3), use auxiliary vector to retrieve the default + // vector status on the process startup. + let has_vectors = { + let v_status = unsafe { libc::prctl(PR_RISCV_V_GET_CONTROL) }; + if v_status >= 0 { + (v_status & PR_RISCV_V_VSTATE_CTRL_CUR_MASK) == PR_RISCV_V_VSTATE_CTRL_ON + } else { + has_v + } + }; + if has_vectors { + enable_feature(Feature::v, test(RISCV_HWPROBE_IMA_V)); + enable_feature(Feature::zve32x, test(RISCV_HWPROBE_EXT_ZVE32X)); + enable_feature(Feature::zve32f, test(RISCV_HWPROBE_EXT_ZVE32F)); + enable_feature(Feature::zve64x, test(RISCV_HWPROBE_EXT_ZVE64X)); + enable_feature(Feature::zve64f, test(RISCV_HWPROBE_EXT_ZVE64F)); + enable_feature(Feature::zve64d, test(RISCV_HWPROBE_EXT_ZVE64D)); + + enable_feature(Feature::zvbb, test(RISCV_HWPROBE_EXT_ZVBB)); + enable_feature(Feature::zvbc, test(RISCV_HWPROBE_EXT_ZVBC)); + enable_feature(Feature::zvkb, test(RISCV_HWPROBE_EXT_ZVKB)); + enable_feature(Feature::zvkg, test(RISCV_HWPROBE_EXT_ZVKG)); + enable_feature(Feature::zvkned, test(RISCV_HWPROBE_EXT_ZVKNED)); + enable_feature(Feature::zvknha, test(RISCV_HWPROBE_EXT_ZVKNHA)); + enable_feature(Feature::zvknhb, test(RISCV_HWPROBE_EXT_ZVKNHB)); + enable_feature(Feature::zvksed, test(RISCV_HWPROBE_EXT_ZVKSED)); + enable_feature(Feature::zvksh, test(RISCV_HWPROBE_EXT_ZVKSH)); + enable_feature(Feature::zvkt, test(RISCV_HWPROBE_EXT_ZVKT)); + + enable_feature(Feature::zvfh, test(RISCV_HWPROBE_EXT_ZVFH)); + enable_feature(Feature::zvfhmin, test(RISCV_HWPROBE_EXT_ZVFHMIN)); + enable_feature(Feature::zvfbfmin, test(RISCV_HWPROBE_EXT_ZVFBFMIN)); + enable_feature(Feature::zvfbfwma, test(RISCV_HWPROBE_EXT_ZVFBFWMA)); + } + is_v_set = true; + }; + + // Set V purely depending on the auxiliary vector + // only if no fine-grained vector extension detection is available. + if !is_v_set { + enable_feature(Feature::v, has_v); + } + + // Handle base ISA. + // If future RV128I is supported, implement with `enable_feature` here. + // Note that we should use `target_arch` instead of `target_pointer_width` + // to avoid misdetection caused by experimental ABIs such as RV64ILP32. + #[cfg(target_arch = "riscv64")] + enable_feature(Feature::rv64i, has_i); + #[cfg(target_arch = "riscv32")] + enable_feature(Feature::rv32i, has_i); + + imply_features(value) +} diff --git a/library/std_detect/src/detect/os/linux/s390x.rs b/library/std_detect/src/detect/os/linux/s390x.rs new file mode 100644 index 00000000000..9b53f526d61 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/s390x.rs @@ -0,0 +1,152 @@ +//! Run-time feature detection for s390x on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +/// Try to read the features from the auxiliary vector +pub(crate) fn detect_features() -> cache::Initializer { + let opt_hwcap: Option<AtHwcap> = auxvec::auxv().ok().map(Into::into); + let facilities = ExtendedFacilityList::new(); + cache(opt_hwcap, facilities) +} + +#[derive(Debug, Default, PartialEq)] +struct AtHwcap { + esan3: bool, + zarch: bool, + stfle: bool, + msa: bool, + ldisp: bool, + eimm: bool, + dfp: bool, + hpage: bool, + etf3eh: bool, + high_gprs: bool, + te: bool, + vxrs: bool, + vxrs_bcd: bool, + vxrs_ext: bool, + gs: bool, + vxrs_ext2: bool, + vxrs_pde: bool, + sort: bool, + dflt: bool, + vxrs_pde2: bool, + nnpa: bool, + pci_mio: bool, + sie: bool, +} + +impl From<auxvec::AuxVec> for AtHwcap { + /// Reads AtHwcap from the auxiliary vector. + fn from(auxv: auxvec::AuxVec) -> Self { + AtHwcap { + esan3: bit::test(auxv.hwcap, 0), + zarch: bit::test(auxv.hwcap, 1), + stfle: bit::test(auxv.hwcap, 2), + msa: bit::test(auxv.hwcap, 3), + ldisp: bit::test(auxv.hwcap, 4), + eimm: bit::test(auxv.hwcap, 5), + dfp: bit::test(auxv.hwcap, 6), + hpage: bit::test(auxv.hwcap, 7), + etf3eh: bit::test(auxv.hwcap, 8), + high_gprs: bit::test(auxv.hwcap, 9), + te: bit::test(auxv.hwcap, 10), + vxrs: bit::test(auxv.hwcap, 11), + vxrs_bcd: bit::test(auxv.hwcap, 12), + vxrs_ext: bit::test(auxv.hwcap, 13), + gs: bit::test(auxv.hwcap, 14), + vxrs_ext2: bit::test(auxv.hwcap, 15), + vxrs_pde: bit::test(auxv.hwcap, 16), + sort: bit::test(auxv.hwcap, 17), + dflt: bit::test(auxv.hwcap, 18), + vxrs_pde2: bit::test(auxv.hwcap, 19), + nnpa: bit::test(auxv.hwcap, 20), + pci_mio: bit::test(auxv.hwcap, 21), + sie: bit::test(auxv.hwcap, 22), + } + } +} + +struct ExtendedFacilityList([u64; 4]); + +impl ExtendedFacilityList { + fn new() -> Self { + let mut result: [u64; 4] = [0; 4]; + // SAFETY: rust/llvm only support s390x version with the `stfle` instruction. + unsafe { + core::arch::asm!( + // equivalently ".insn s, 0xb2b00000, 0({1})", + "stfle 0({})", + in(reg_addr) result.as_mut_ptr() , + inout("r0") result.len() as u64 - 1 => _, + options(nostack) + ); + } + Self(result) + } + + const fn get_bit(&self, n: usize) -> bool { + // NOTE: bits are numbered from the left. + self.0[n / 64] & (1 << (63 - (n % 64))) != 0 + } +} + +/// Initializes the cache from the feature bits. +/// +/// These values are part of the platform-specific [asm/elf.h][kernel], and are a selection of the +/// fields found in the [Facility Indications]. +/// +/// [Facility Indications]: https://www.ibm.com/support/pages/sites/default/files/2021-05/SA22-7871-10.pdf#page=63 +/// [kernel]: https://github.com/torvalds/linux/blob/b62cef9a5c673f1b8083159f5dc03c1c5daced2f/arch/s390/include/asm/elf.h#L129 +fn cache(hwcap: Option<AtHwcap>, facilities: ExtendedFacilityList) -> cache::Initializer { + let mut value = cache::Initializer::default(); + + { + let mut enable_if_set = |bit_index, f| { + if facilities.get_bit(bit_index) { + value.set(f as u32); + } + }; + + // We use HWCAP for `vector` because it requires both hardware and kernel support. + if let Some(AtHwcap { vxrs: true, .. }) = hwcap { + // vector and related + + enable_if_set(129, Feature::vector); + + enable_if_set(135, Feature::vector_enhancements_1); + enable_if_set(148, Feature::vector_enhancements_2); + enable_if_set(198, Feature::vector_enhancements_3); + + enable_if_set(134, Feature::vector_packed_decimal); + enable_if_set(152, Feature::vector_packed_decimal_enhancement); + enable_if_set(192, Feature::vector_packed_decimal_enhancement_2); + enable_if_set(199, Feature::vector_packed_decimal_enhancement_3); + + enable_if_set(165, Feature::nnp_assist); + } + + // others + + enable_if_set(76, Feature::message_security_assist_extension3); + enable_if_set(77, Feature::message_security_assist_extension4); + enable_if_set(57, Feature::message_security_assist_extension5); + enable_if_set(146, Feature::message_security_assist_extension8); + enable_if_set(155, Feature::message_security_assist_extension9); + enable_if_set(86, Feature::message_security_assist_extension12); + + enable_if_set(58, Feature::miscellaneous_extensions_2); + enable_if_set(61, Feature::miscellaneous_extensions_3); + enable_if_set(84, Feature::miscellaneous_extensions_4); + + enable_if_set(45, Feature::high_word); + enable_if_set(73, Feature::transactional_execution); + enable_if_set(133, Feature::guarded_storage); + enable_if_set(150, Feature::enhanced_sort); + enable_if_set(151, Feature::deflate_conversion); + enable_if_set(201, Feature::concurrent_functions); + } + + value +} |
