diff options
Diffstat (limited to 'library/std_detect/src/detect/os/linux/aarch64.rs')
| -rw-r--r-- | library/std_detect/src/detect/os/linux/aarch64.rs | 484 |
1 files changed, 484 insertions, 0 deletions
diff --git a/library/std_detect/src/detect/os/linux/aarch64.rs b/library/std_detect/src/detect/os/linux/aarch64.rs new file mode 100644 index 00000000000..22a9cefff7b --- /dev/null +++ b/library/std_detect/src/detect/os/linux/aarch64.rs @@ -0,0 +1,484 @@ +//! Run-time feature detection for Aarch64 on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + #[cfg(target_os = "android")] + let is_exynos9810 = { + // Samsung Exynos 9810 has a bug that big and little cores have different + // ISAs. And on older Android (pre-9), the kernel incorrectly reports + // that features available only on some cores are available on all cores. + // https://reviews.llvm.org/D114523 + let mut arch = [0_u8; libc::PROP_VALUE_MAX as usize]; + let len = unsafe { + libc::__system_property_get(c"ro.arch".as_ptr(), arch.as_mut_ptr() as *mut libc::c_char) + }; + // On Exynos, ro.arch is not available on Android 12+, but it is fine + // because Android 9+ includes the fix. + len > 0 && arch.starts_with(b"exynos9810") + }; + #[cfg(not(target_os = "android"))] + let is_exynos9810 = false; + + if let Ok(auxv) = auxvec::auxv() { + let hwcap: AtHwcap = auxv.into(); + return hwcap.cache(is_exynos9810); + } + cache::Initializer::default() +} + +/// These values are part of the platform-specific [asm/hwcap.h][hwcap] . +/// +/// The names match those used for cpuinfo. +/// +/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h +#[derive(Debug, Default, PartialEq)] +struct AtHwcap { + // AT_HWCAP + fp: bool, + asimd: bool, + // evtstrm: No LLVM support. + aes: bool, + pmull: bool, + sha1: bool, + sha2: bool, + crc32: bool, + atomics: bool, + fphp: bool, + asimdhp: bool, + // cpuid: No LLVM support. + asimdrdm: bool, + jscvt: bool, + fcma: bool, + lrcpc: bool, + dcpop: bool, + sha3: bool, + sm3: bool, + sm4: bool, + asimddp: bool, + sha512: bool, + sve: bool, + fhm: bool, + dit: bool, + uscat: bool, + ilrcpc: bool, + flagm: bool, + ssbs: bool, + sb: bool, + paca: bool, + pacg: bool, + + // AT_HWCAP2 + dcpodp: bool, + sve2: bool, + sveaes: bool, + svepmull: bool, + svebitperm: bool, + svesha3: bool, + svesm4: bool, + flagm2: bool, + frint: bool, + // svei8mm: See i8mm feature. + svef32mm: bool, + svef64mm: bool, + // svebf16: See bf16 feature. + i8mm: bool, + bf16: bool, + // dgh: No LLVM support. + rng: bool, + bti: bool, + mte: bool, + ecv: bool, + // afp: bool, + // rpres: bool, + // mte3: bool, + sme: bool, + smei16i64: bool, + smef64f64: bool, + // smei8i32: bool, + // smef16f32: bool, + // smeb16f32: bool, + // smef32f32: bool, + smefa64: bool, + wfxt: bool, + // ebf16: bool, + // sveebf16: bool, + cssc: bool, + // rprfm: bool, + sve2p1: bool, + sme2: bool, + sme2p1: bool, + // smei16i32: bool, + // smebi32i32: bool, + smeb16b16: bool, + smef16f16: bool, + mops: bool, + hbc: bool, + sveb16b16: bool, + lrcpc3: bool, + lse128: bool, + fpmr: bool, + lut: bool, + faminmax: bool, + f8cvt: bool, + f8fma: bool, + f8dp4: bool, + f8dp2: bool, + f8e4m3: bool, + f8e5m2: bool, + smelutv2: bool, + smef8f16: bool, + smef8f32: bool, + smesf8fma: bool, + smesf8dp4: bool, + smesf8dp2: bool, + // pauthlr: bool, +} + +impl From<auxvec::AuxVec> for AtHwcap { + /// Reads AtHwcap from the auxiliary vector. + fn from(auxv: auxvec::AuxVec) -> Self { + AtHwcap { + fp: bit::test(auxv.hwcap, 0), + asimd: bit::test(auxv.hwcap, 1), + // evtstrm: bit::test(auxv.hwcap, 2), + aes: bit::test(auxv.hwcap, 3), + pmull: bit::test(auxv.hwcap, 4), + sha1: bit::test(auxv.hwcap, 5), + sha2: bit::test(auxv.hwcap, 6), + crc32: bit::test(auxv.hwcap, 7), + atomics: bit::test(auxv.hwcap, 8), + fphp: bit::test(auxv.hwcap, 9), + asimdhp: bit::test(auxv.hwcap, 10), + // cpuid: bit::test(auxv.hwcap, 11), + asimdrdm: bit::test(auxv.hwcap, 12), + jscvt: bit::test(auxv.hwcap, 13), + fcma: bit::test(auxv.hwcap, 14), + lrcpc: bit::test(auxv.hwcap, 15), + dcpop: bit::test(auxv.hwcap, 16), + sha3: bit::test(auxv.hwcap, 17), + sm3: bit::test(auxv.hwcap, 18), + sm4: bit::test(auxv.hwcap, 19), + asimddp: bit::test(auxv.hwcap, 20), + sha512: bit::test(auxv.hwcap, 21), + sve: bit::test(auxv.hwcap, 22), + fhm: bit::test(auxv.hwcap, 23), + dit: bit::test(auxv.hwcap, 24), + uscat: bit::test(auxv.hwcap, 25), + ilrcpc: bit::test(auxv.hwcap, 26), + flagm: bit::test(auxv.hwcap, 27), + ssbs: bit::test(auxv.hwcap, 28), + sb: bit::test(auxv.hwcap, 29), + paca: bit::test(auxv.hwcap, 30), + pacg: bit::test(auxv.hwcap, 31), + + // AT_HWCAP2 + dcpodp: bit::test(auxv.hwcap2, 0), + sve2: bit::test(auxv.hwcap2, 1), + sveaes: bit::test(auxv.hwcap2, 2), + svepmull: bit::test(auxv.hwcap2, 3), + svebitperm: bit::test(auxv.hwcap2, 4), + svesha3: bit::test(auxv.hwcap2, 5), + svesm4: bit::test(auxv.hwcap2, 6), + flagm2: bit::test(auxv.hwcap2, 7), + frint: bit::test(auxv.hwcap2, 8), + // svei8mm: bit::test(auxv.hwcap2, 9), + svef32mm: bit::test(auxv.hwcap2, 10), + svef64mm: bit::test(auxv.hwcap2, 11), + // svebf16: bit::test(auxv.hwcap2, 12), + i8mm: bit::test(auxv.hwcap2, 13), + bf16: bit::test(auxv.hwcap2, 14), + // dgh: bit::test(auxv.hwcap2, 15), + rng: bit::test(auxv.hwcap2, 16), + bti: bit::test(auxv.hwcap2, 17), + mte: bit::test(auxv.hwcap2, 18), + ecv: bit::test(auxv.hwcap2, 19), + // afp: bit::test(auxv.hwcap2, 20), + // rpres: bit::test(auxv.hwcap2, 21), + // mte3: bit::test(auxv.hwcap2, 22), + sme: bit::test(auxv.hwcap2, 23), + smei16i64: bit::test(auxv.hwcap2, 24), + smef64f64: bit::test(auxv.hwcap2, 25), + // smei8i32: bit::test(auxv.hwcap2, 26), + // smef16f32: bit::test(auxv.hwcap2, 27), + // smeb16f32: bit::test(auxv.hwcap2, 28), + // smef32f32: bit::test(auxv.hwcap2, 29), + smefa64: bit::test(auxv.hwcap2, 30), + wfxt: bit::test(auxv.hwcap2, 31), + // ebf16: bit::test(auxv.hwcap2, 32), + // sveebf16: bit::test(auxv.hwcap2, 33), + cssc: bit::test(auxv.hwcap2, 34), + // rprfm: bit::test(auxv.hwcap2, 35), + sve2p1: bit::test(auxv.hwcap2, 36), + sme2: bit::test(auxv.hwcap2, 37), + sme2p1: bit::test(auxv.hwcap2, 38), + // smei16i32: bit::test(auxv.hwcap2, 39), + // smebi32i32: bit::test(auxv.hwcap2, 40), + smeb16b16: bit::test(auxv.hwcap2, 41), + smef16f16: bit::test(auxv.hwcap2, 42), + mops: bit::test(auxv.hwcap2, 43), + hbc: bit::test(auxv.hwcap2, 44), + sveb16b16: bit::test(auxv.hwcap2, 45), + lrcpc3: bit::test(auxv.hwcap2, 46), + lse128: bit::test(auxv.hwcap2, 47), + fpmr: bit::test(auxv.hwcap2, 48), + lut: bit::test(auxv.hwcap2, 49), + faminmax: bit::test(auxv.hwcap2, 50), + f8cvt: bit::test(auxv.hwcap2, 51), + f8fma: bit::test(auxv.hwcap2, 52), + f8dp4: bit::test(auxv.hwcap2, 53), + f8dp2: bit::test(auxv.hwcap2, 54), + f8e4m3: bit::test(auxv.hwcap2, 55), + f8e5m2: bit::test(auxv.hwcap2, 56), + smelutv2: bit::test(auxv.hwcap2, 57), + smef8f16: bit::test(auxv.hwcap2, 58), + smef8f32: bit::test(auxv.hwcap2, 59), + smesf8fma: bit::test(auxv.hwcap2, 60), + smesf8dp4: bit::test(auxv.hwcap2, 61), + smesf8dp2: bit::test(auxv.hwcap2, 62), + // pauthlr: bit::test(auxv.hwcap2, ??), + } + } +} + +impl AtHwcap { + /// Initializes the cache from the feature -bits. + /// + /// The feature dependencies here come directly from LLVM's feature definitions: + /// https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AArch64/AArch64.td + fn cache(self, is_exynos9810: bool) -> cache::Initializer { + let mut value = cache::Initializer::default(); + { + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + // Samsung Exynos 9810 has a bug that big and little cores have different + // ISAs. And on older Android (pre-9), the kernel incorrectly reports + // that features available only on some cores are available on all cores. + // So, only check features that are known to be available on exynos-m3: + // $ rustc --print cfg --target aarch64-linux-android -C target-cpu=exynos-m3 | grep target_feature + // See also https://github.com/rust-lang/stdarch/pull/1378#discussion_r1103748342. + if is_exynos9810 { + enable_feature(Feature::fp, self.fp); + enable_feature(Feature::crc, self.crc32); + // ASIMD support requires float support - if half-floats are + // supported, it also requires half-float support: + let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp); + enable_feature(Feature::asimd, asimd); + // Cryptographic extensions require ASIMD + // AES also covers FEAT_PMULL + enable_feature(Feature::aes, self.aes && self.pmull && asimd); + enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd); + return value; + } + + enable_feature(Feature::fp, self.fp); + // Half-float support requires float support + enable_feature(Feature::fp16, self.fp && self.fphp); + // FHM (fp16fml in LLVM) requires half float support + enable_feature(Feature::fhm, self.fphp && self.fhm); + enable_feature(Feature::pmull, self.pmull); + enable_feature(Feature::crc, self.crc32); + enable_feature(Feature::lse, self.atomics); + enable_feature(Feature::lse2, self.uscat); + enable_feature(Feature::lse128, self.lse128 && self.atomics); + enable_feature(Feature::rcpc, self.lrcpc); + // RCPC2 (rcpc-immo in LLVM) requires RCPC support + let rcpc2 = self.ilrcpc && self.lrcpc; + enable_feature(Feature::rcpc2, rcpc2); + enable_feature(Feature::rcpc3, self.lrcpc3 && rcpc2); + enable_feature(Feature::dit, self.dit); + enable_feature(Feature::flagm, self.flagm); + enable_feature(Feature::flagm2, self.flagm2); + enable_feature(Feature::ssbs, self.ssbs); + enable_feature(Feature::sb, self.sb); + enable_feature(Feature::paca, self.paca); + enable_feature(Feature::pacg, self.pacg); + // enable_feature(Feature::pauth_lr, self.pauthlr); + enable_feature(Feature::dpb, self.dcpop); + enable_feature(Feature::dpb2, self.dcpodp); + enable_feature(Feature::rand, self.rng); + enable_feature(Feature::bti, self.bti); + enable_feature(Feature::mte, self.mte); + // jsconv requires float support + enable_feature(Feature::jsconv, self.jscvt && self.fp); + enable_feature(Feature::rdm, self.asimdrdm); + enable_feature(Feature::dotprod, self.asimddp); + enable_feature(Feature::frintts, self.frint); + + // FEAT_I8MM & FEAT_BF16 also include optional SVE components which linux exposes + // separately. We ignore that distinction here. + enable_feature(Feature::i8mm, self.i8mm); + enable_feature(Feature::bf16, self.bf16); + + // ASIMD support requires float support - if half-floats are + // supported, it also requires half-float support: + let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp); + enable_feature(Feature::asimd, asimd); + // ASIMD extensions require ASIMD support: + enable_feature(Feature::fcma, self.fcma && asimd); + enable_feature(Feature::sve, self.sve && asimd); + + // SVE extensions require SVE & ASIMD + enable_feature(Feature::f32mm, self.svef32mm && self.sve && asimd); + enable_feature(Feature::f64mm, self.svef64mm && self.sve && asimd); + + // Cryptographic extensions require ASIMD + enable_feature(Feature::aes, self.aes && asimd); + enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd); + // SHA512/SHA3 require SHA1 & SHA256 + enable_feature( + Feature::sha3, + self.sha512 && self.sha3 && self.sha1 && self.sha2 && asimd, + ); + enable_feature(Feature::sm4, self.sm3 && self.sm4 && asimd); + + // SVE2 requires SVE + let sve2 = self.sve2 && self.sve && asimd; + enable_feature(Feature::sve2, sve2); + enable_feature(Feature::sve2p1, self.sve2p1 && sve2); + // SVE2 extensions require SVE2 and crypto features + enable_feature( + Feature::sve2_aes, + self.sveaes && self.svepmull && sve2 && self.aes, + ); + enable_feature( + Feature::sve2_sm4, + self.svesm4 && sve2 && self.sm3 && self.sm4, + ); + enable_feature( + Feature::sve2_sha3, + self.svesha3 && sve2 && self.sha512 && self.sha3 && self.sha1 && self.sha2, + ); + enable_feature(Feature::sve2_bitperm, self.svebitperm && self.sve2); + enable_feature(Feature::sve_b16b16, self.bf16 && self.sveb16b16); + enable_feature(Feature::hbc, self.hbc); + enable_feature(Feature::mops, self.mops); + enable_feature(Feature::ecv, self.ecv); + enable_feature(Feature::lut, self.lut); + enable_feature(Feature::cssc, self.cssc); + enable_feature(Feature::fpmr, self.fpmr); + enable_feature(Feature::faminmax, self.faminmax); + let fp8 = self.f8cvt && self.faminmax && self.lut && self.bf16; + enable_feature(Feature::fp8, fp8); + let fp8fma = self.f8fma && fp8; + enable_feature(Feature::fp8fma, fp8fma); + let fp8dot4 = self.f8dp4 && fp8fma; + enable_feature(Feature::fp8dot4, fp8dot4); + enable_feature(Feature::fp8dot2, self.f8dp2 && fp8dot4); + enable_feature(Feature::wfxt, self.wfxt); + let sme = self.sme && self.bf16; + enable_feature(Feature::sme, sme); + enable_feature(Feature::sme_i16i64, self.smei16i64 && sme); + enable_feature(Feature::sme_f64f64, self.smef64f64 && sme); + enable_feature(Feature::sme_fa64, self.smefa64 && sme && sve2); + let sme2 = self.sme2 && sme; + enable_feature(Feature::sme2, sme2); + enable_feature(Feature::sme2p1, self.sme2p1 && sme2); + enable_feature( + Feature::sme_b16b16, + sme2 && self.bf16 && self.sveb16b16 && self.smeb16b16, + ); + enable_feature(Feature::sme_f16f16, self.smef16f16 && sme2); + enable_feature(Feature::sme_lutv2, self.smelutv2); + let sme_f8f32 = self.smef8f32 && sme2 && fp8; + enable_feature(Feature::sme_f8f32, sme_f8f32); + enable_feature(Feature::sme_f8f16, self.smef8f16 && sme_f8f32); + let ssve_fp8fma = self.smesf8fma && sme2 && fp8; + enable_feature(Feature::ssve_fp8fma, ssve_fp8fma); + let ssve_fp8dot4 = self.smesf8dp4 && ssve_fp8fma; + enable_feature(Feature::ssve_fp8dot4, ssve_fp8dot4); + enable_feature(Feature::ssve_fp8dot2, self.smesf8dp2 && ssve_fp8dot4); + } + value + } +} + +#[cfg(target_endian = "little")] +#[cfg(test)] +mod tests { + use super::*; + + #[cfg(feature = "std_detect_file_io")] + mod auxv_from_file { + use super::auxvec::auxv_from_file; + use super::*; + // The baseline hwcaps used in the (artificial) auxv test files. + fn baseline_hwcaps() -> AtHwcap { + AtHwcap { + fp: true, + asimd: true, + aes: true, + pmull: true, + sha1: true, + sha2: true, + crc32: true, + atomics: true, + fphp: true, + asimdhp: true, + asimdrdm: true, + lrcpc: true, + dcpop: true, + asimddp: true, + ssbs: true, + ..AtHwcap::default() + } + } + + #[test] + fn linux_empty_hwcap2_aarch64() { + let file = concat!( + env!("CARGO_MANIFEST_DIR"), + "/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv" + ); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + println!("HWCAP : 0x{:0x}", v.hwcap); + println!("HWCAP2: 0x{:0x}", v.hwcap2); + assert_eq!(AtHwcap::from(v), baseline_hwcaps()); + } + #[test] + fn linux_no_hwcap2_aarch64() { + let file = concat!( + env!("CARGO_MANIFEST_DIR"), + "/src/detect/test_data/linux-no-hwcap2-aarch64.auxv" + ); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + println!("HWCAP : 0x{:0x}", v.hwcap); + println!("HWCAP2: 0x{:0x}", v.hwcap2); + assert_eq!(AtHwcap::from(v), baseline_hwcaps()); + } + #[test] + fn linux_hwcap2_aarch64() { + let file = concat!( + env!("CARGO_MANIFEST_DIR"), + "/src/detect/test_data/linux-hwcap2-aarch64.auxv" + ); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + println!("HWCAP : 0x{:0x}", v.hwcap); + println!("HWCAP2: 0x{:0x}", v.hwcap2); + assert_eq!( + AtHwcap::from(v), + AtHwcap { + // Some other HWCAP bits. + paca: true, + pacg: true, + // HWCAP2-only bits. + dcpodp: true, + frint: true, + rng: true, + bti: true, + mte: true, + ..baseline_hwcaps() + } + ); + } + } +} |
