diff options
| author | Jakub Beránek <berykubik@gmail.com> | 2025-07-04 09:26:12 +0200 |
|---|---|---|
| committer | Jakub Beránek <berykubik@gmail.com> | 2025-07-22 20:17:06 +0200 |
| commit | 5b2de8ab27e4a319e23817b61830a5cc6fd1745e (patch) | |
| tree | 94d6ce5680690c678e56d907ec27145fb464eeed /library/std_detect/src/detect/os | |
| parent | 2e5367566819ca7878baa9600ae7a93eb0e37bbf (diff) | |
| download | rust-5b2de8ab27e4a319e23817b61830a5cc6fd1745e.tar.gz rust-5b2de8ab27e4a319e23817b61830a5cc6fd1745e.zip | |
Move `std_detect` from `library/stdarch` to `library`
Diffstat (limited to 'library/std_detect/src/detect/os')
21 files changed, 2693 insertions, 0 deletions
diff --git a/library/std_detect/src/detect/os/aarch64.rs b/library/std_detect/src/detect/os/aarch64.rs new file mode 100644 index 00000000000..1ff2a17e6e1 --- /dev/null +++ b/library/std_detect/src/detect/os/aarch64.rs @@ -0,0 +1,130 @@ +//! Run-time feature detection for Aarch64 on any OS that emulates the mrs instruction. +//! +//! On FreeBSD >= 12.0, Linux >= 4.11 and other operating systems, it is possible to use +//! privileged system registers from userspace to check CPU feature support. +//! +//! AArch64 system registers ID_AA64ISAR0_EL1, ID_AA64PFR0_EL1, ID_AA64ISAR1_EL1 +//! have bits dedicated to features like AdvSIMD, CRC32, AES, atomics (LSE), etc. +//! Each part of the register indicates the level of support for a certain feature, e.g. +//! when ID_AA64ISAR0_EL1\[7:4\] is >= 1, AES is supported; when it's >= 2, PMULL is supported. +//! +//! For proper support of [SoCs where different cores have different capabilities](https://medium.com/@jadr2ddude/a-big-little-problem-a-tale-of-big-little-gone-wrong-e7778ce744bb), +//! the OS has to always report only the features supported by all cores, like [FreeBSD does](https://reviews.freebsd.org/D17137#393947). +//! +//! References: +//! +//! - [Zircon implementation](https://fuchsia.googlesource.com/zircon/+/master/kernel/arch/arm64/feature.cpp) +//! - [Linux documentation](https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt) +//! - [ARM documentation](https://developer.arm.com/documentation/ddi0601/2022-12/AArch64-Registers?lang=en) + +use crate::detect::{Feature, cache}; +use core::arch::asm; + +/// Try to read the features from the system registers. +/// +/// This will cause SIGILL if the current OS is not trapping the mrs instruction. +pub(crate) fn detect_features() -> cache::Initializer { + // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0 + let aa64isar0: u64; + unsafe { + asm!( + "mrs {}, ID_AA64ISAR0_EL1", + out(reg) aa64isar0, + options(pure, nomem, preserves_flags, nostack) + ); + } + + // ID_AA64ISAR1_EL1 - Instruction Set Attribute Register 1 + let aa64isar1: u64; + unsafe { + asm!( + "mrs {}, ID_AA64ISAR1_EL1", + out(reg) aa64isar1, + options(pure, nomem, preserves_flags, nostack) + ); + } + + // ID_AA64MMFR2_EL1 - AArch64 Memory Model Feature Register 2 + let aa64mmfr2: u64; + unsafe { + asm!( + "mrs {}, ID_AA64MMFR2_EL1", + out(reg) aa64mmfr2, + options(pure, nomem, preserves_flags, nostack) + ); + } + + // ID_AA64PFR0_EL1 - Processor Feature Register 0 + let aa64pfr0: u64; + unsafe { + asm!( + "mrs {}, ID_AA64PFR0_EL1", + out(reg) aa64pfr0, + options(pure, nomem, preserves_flags, nostack) + ); + } + + parse_system_registers(aa64isar0, aa64isar1, aa64mmfr2, Some(aa64pfr0)) +} + +pub(crate) fn parse_system_registers( + aa64isar0: u64, + aa64isar1: u64, + aa64mmfr2: u64, + aa64pfr0: Option<u64>, +) -> cache::Initializer { + let mut value = cache::Initializer::default(); + + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0 + enable_feature(Feature::pmull, bits_shift(aa64isar0, 7, 4) >= 2); + enable_feature(Feature::tme, bits_shift(aa64isar0, 27, 24) == 1); + enable_feature(Feature::lse, bits_shift(aa64isar0, 23, 20) >= 2); + enable_feature(Feature::crc, bits_shift(aa64isar0, 19, 16) >= 1); + + // ID_AA64PFR0_EL1 - Processor Feature Register 0 + if let Some(aa64pfr0) = aa64pfr0 { + let fp = bits_shift(aa64pfr0, 19, 16) < 0xF; + let fphp = bits_shift(aa64pfr0, 19, 16) >= 1; + let asimd = bits_shift(aa64pfr0, 23, 20) < 0xF; + let asimdhp = bits_shift(aa64pfr0, 23, 20) >= 1; + enable_feature(Feature::fp, fp); + enable_feature(Feature::fp16, fphp); + // SIMD support requires float support - if half-floats are + // supported, it also requires half-float support: + enable_feature(Feature::asimd, fp && asimd && (!fphp | asimdhp)); + // SIMD extensions require SIMD support: + enable_feature(Feature::aes, asimd && bits_shift(aa64isar0, 7, 4) >= 2); + let sha1 = bits_shift(aa64isar0, 11, 8) >= 1; + let sha2 = bits_shift(aa64isar0, 15, 12) >= 1; + enable_feature(Feature::sha2, asimd && sha1 && sha2); + enable_feature(Feature::rdm, asimd && bits_shift(aa64isar0, 31, 28) >= 1); + enable_feature( + Feature::dotprod, + asimd && bits_shift(aa64isar0, 47, 44) >= 1, + ); + enable_feature(Feature::sve, asimd && bits_shift(aa64pfr0, 35, 32) >= 1); + } + + // ID_AA64ISAR1_EL1 - Instruction Set Attribute Register 1 + // Check for either APA or API field + enable_feature(Feature::paca, bits_shift(aa64isar1, 11, 4) >= 1); + enable_feature(Feature::rcpc, bits_shift(aa64isar1, 23, 20) >= 1); + // Check for either GPA or GPI field + enable_feature(Feature::pacg, bits_shift(aa64isar1, 31, 24) >= 1); + + // ID_AA64MMFR2_EL1 - AArch64 Memory Model Feature Register 2 + enable_feature(Feature::lse2, bits_shift(aa64mmfr2, 35, 32) >= 1); + + value +} + +#[inline] +fn bits_shift(x: u64, high: usize, low: usize) -> u64 { + (x >> low) & ((1 << (high - low + 1)) - 1) +} diff --git a/library/std_detect/src/detect/os/darwin/aarch64.rs b/library/std_detect/src/detect/os/darwin/aarch64.rs new file mode 100644 index 00000000000..44d921689e5 --- /dev/null +++ b/library/std_detect/src/detect/os/darwin/aarch64.rs @@ -0,0 +1,155 @@ +//! Run-time feature detection for aarch64 on Darwin (macOS/iOS/tvOS/watchOS/visionOS). +//! +//! <https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics> + +use crate::detect::{Feature, cache}; +use core::ffi::CStr; + +#[inline] +fn _sysctlbyname(name: &CStr) -> bool { + use libc; + + let mut enabled: i32 = 0; + let mut enabled_len: usize = 4; + let enabled_ptr = &mut enabled as *mut i32 as *mut libc::c_void; + + let ret = unsafe { + libc::sysctlbyname( + name.as_ptr(), + enabled_ptr, + &mut enabled_len, + core::ptr::null_mut(), + 0, + ) + }; + + match ret { + 0 => enabled != 0, + _ => false, + } +} + +/// Try to read the features using sysctlbyname. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + // Armv8.0 features not using the standard identifiers + let fp = _sysctlbyname(c"hw.optional.floatingpoint"); + let asimd = _sysctlbyname(c"hw.optional.AdvSIMD"); + let crc = _sysctlbyname(c"hw.optional.armv8_crc32"); + + // Armv8 and Armv9 features using the standard identifiers + let aes = _sysctlbyname(c"hw.optional.arm.FEAT_AES"); + let bf16 = _sysctlbyname(c"hw.optional.arm.FEAT_BF16"); + let bti = _sysctlbyname(c"hw.optional.arm.FEAT_BTI"); + let cssc = _sysctlbyname(c"hw.optional.arm.FEAT_CSSC"); + let dit = _sysctlbyname(c"hw.optional.arm.FEAT_DIT"); + let dpb = _sysctlbyname(c"hw.optional.arm.FEAT_DPB"); + let dpb2 = _sysctlbyname(c"hw.optional.arm.FEAT_DPB2"); + let dotprod = _sysctlbyname(c"hw.optional.arm.FEAT_DotProd"); + let ecv = _sysctlbyname(c"hw.optional.arm.FEAT_ECV"); + let fcma = _sysctlbyname(c"hw.optional.arm.FEAT_FCMA"); + let fhm = _sysctlbyname(c"hw.optional.arm.FEAT_FHM"); + let fp16 = _sysctlbyname(c"hw.optional.arm.FEAT_FP16"); + let frintts = _sysctlbyname(c"hw.optional.arm.FEAT_FRINTTS"); + let flagm = _sysctlbyname(c"hw.optional.arm.FEAT_FlagM"); + let flagm2 = _sysctlbyname(c"hw.optional.arm.FEAT_FlagM2"); + let hbc = _sysctlbyname(c"hw.optional.arm.FEAT_HBC"); + let i8mm = _sysctlbyname(c"hw.optional.arm.FEAT_I8MM"); + let jsconv = _sysctlbyname(c"hw.optional.arm.FEAT_JSCVT"); + let rcpc = _sysctlbyname(c"hw.optional.arm.FEAT_LRCPC"); + let rcpc2 = _sysctlbyname(c"hw.optional.arm.FEAT_LRCPC2"); + let lse = _sysctlbyname(c"hw.optional.arm.FEAT_LSE"); + let lse2 = _sysctlbyname(c"hw.optional.arm.FEAT_LSE2"); + let pauth = _sysctlbyname(c"hw.optional.arm.FEAT_PAuth"); + let pmull = _sysctlbyname(c"hw.optional.arm.FEAT_PMULL"); + let rdm = _sysctlbyname(c"hw.optional.arm.FEAT_RDM"); + let sb = _sysctlbyname(c"hw.optional.arm.FEAT_SB"); + let sha1 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA1"); + let sha256 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA256"); + let sha3 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA3"); + let sha512 = _sysctlbyname(c"hw.optional.arm.FEAT_SHA512"); + let sme = _sysctlbyname(c"hw.optional.arm.FEAT_SME"); + let sme2 = _sysctlbyname(c"hw.optional.arm.FEAT_SME2"); + let sme_f64f64 = _sysctlbyname(c"hw.optional.arm.FEAT_SME_F64F64"); + let sme_i16i64 = _sysctlbyname(c"hw.optional.arm.FEAT_SME_I16I64"); + let ssbs = _sysctlbyname(c"hw.optional.arm.FEAT_SSBS"); + let wfxt = _sysctlbyname(c"hw.optional.arm.FEAT_WFxT"); + + // The following features are not exposed by `is_aarch64_feature_detected`, + // but *are* reported by `sysctl`. They are here as documentation that they + // exist, and may potentially be exposed later. + /* + let afp = _sysctlbyname(c"hw.optional.arm.FEAT_AFP"); + let csv2 = _sysctlbyname(c"hw.optional.arm.FEAT_CSV2"); + let csv3 = _sysctlbyname(c"hw.optional.arm.FEAT_CSV3"); + let ebf16 = _sysctlbyname(c"hw.optional.arm.FEAT_EBF16"); + let fpac = _sysctlbyname(c"hw.optional.arm.FEAT_FPAC"); + let fpaccombine = _sysctlbyname(c"hw.optional.arm.FEAT_FPACCOMBINE"); + let pacimp = _sysctlbyname(c"hw.optional.arm.FEAT_PACIMP"); + let pauth2 = _sysctlbyname(c"hw.optional.arm.FEAT_PAuth2"); + let rpres = _sysctlbyname(c"hw.optional.arm.FEAT_RPRES"); + let specres = _sysctlbyname(c"hw.optional.arm.FEAT_SPECRES"); + let specres2 = _sysctlbyname(c"hw.optional.arm.FEAT_SPECRES2"); + */ + + // The following "features" are reported by `sysctl` but are mandatory parts + // of SME or SME2, and so are not exposed separately by + // `is_aarch64_feature_detected`. They are here to document their + // existence, in case they're needed in the future. + /* + let sme_b16f32 = _sysctlbyname(c"hw.optional.arm.SME_B16F32"); + let sme_bi32i32 = _sysctlbyname(c"hw.optional.arm.SME_BI32I32"); + let sme_f16f32 = _sysctlbyname(c"hw.optional.arm.SME_F16F32"); + let sme_f32f32 = _sysctlbyname(c"hw.optional.arm.SME_F32F32"); + let sme_i16i32 = _sysctlbyname(c"hw.optional.arm.SME_I16I32"); + let sme_i8i32 = _sysctlbyname(c"hw.optional.arm.SME_I8I32"); + */ + + enable_feature(Feature::aes, aes && pmull); + enable_feature(Feature::asimd, asimd); + enable_feature(Feature::bf16, bf16); + enable_feature(Feature::bti, bti); + enable_feature(Feature::crc, crc); + enable_feature(Feature::cssc, cssc); + enable_feature(Feature::dit, dit); + enable_feature(Feature::dotprod, dotprod); + enable_feature(Feature::dpb, dpb); + enable_feature(Feature::dpb2, dpb2); + enable_feature(Feature::ecv, ecv); + enable_feature(Feature::fcma, fcma); + enable_feature(Feature::fhm, fhm); + enable_feature(Feature::flagm, flagm); + enable_feature(Feature::flagm2, flagm2); + enable_feature(Feature::fp, fp); + enable_feature(Feature::fp16, fp16); + enable_feature(Feature::frintts, frintts); + enable_feature(Feature::hbc, hbc); + enable_feature(Feature::i8mm, i8mm); + enable_feature(Feature::jsconv, jsconv); + enable_feature(Feature::lse, lse); + enable_feature(Feature::lse2, lse2); + enable_feature(Feature::paca, pauth); + enable_feature(Feature::pacg, pauth); + enable_feature(Feature::pmull, aes && pmull); + enable_feature(Feature::rcpc, rcpc); + enable_feature(Feature::rcpc2, rcpc2); + enable_feature(Feature::rdm, rdm); + enable_feature(Feature::sb, sb); + enable_feature(Feature::sha2, sha1 && sha256 && asimd); + enable_feature(Feature::sha3, sha512 && sha3 && asimd); + enable_feature(Feature::sme, sme); + enable_feature(Feature::sme2, sme2); + enable_feature(Feature::sme_f64f64, sme_f64f64); + enable_feature(Feature::sme_i16i64, sme_i16i64); + enable_feature(Feature::ssbs, ssbs); + enable_feature(Feature::wfxt, wfxt); + + value +} diff --git a/library/std_detect/src/detect/os/freebsd/aarch64.rs b/library/std_detect/src/detect/os/freebsd/aarch64.rs new file mode 100644 index 00000000000..ccc48f53605 --- /dev/null +++ b/library/std_detect/src/detect/os/freebsd/aarch64.rs @@ -0,0 +1,3 @@ +//! Run-time feature detection for Aarch64 on FreeBSD. + +pub(crate) use super::super::aarch64::detect_features; diff --git a/library/std_detect/src/detect/os/freebsd/arm.rs b/library/std_detect/src/detect/os/freebsd/arm.rs new file mode 100644 index 00000000000..0a15156e1bd --- /dev/null +++ b/library/std_detect/src/detect/os/freebsd/arm.rs @@ -0,0 +1,36 @@ +//! Run-time feature detection for ARM on FreeBSD + +use super::auxvec; +use crate::detect::{Feature, cache}; + +// Defined in machine/elf.h. +// https://github.com/freebsd/freebsd-src/blob/deb63adf945d446ed91a9d84124c71f15ae571d1/sys/arm/include/elf.h +const HWCAP_NEON: usize = 0x00001000; +const HWCAP2_AES: usize = 0x00000001; +const HWCAP2_PMULL: usize = 0x00000002; +const HWCAP2_SHA1: usize = 0x00000004; +const HWCAP2_SHA2: usize = 0x00000008; +const HWCAP2_CRC32: usize = 0x00000010; + +/// Try to read the features from the auxiliary vector +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::neon, auxv.hwcap & HWCAP_NEON != 0); + enable_feature(&mut value, Feature::pmull, auxv.hwcap2 & HWCAP2_PMULL != 0); + enable_feature(&mut value, Feature::crc, auxv.hwcap2 & HWCAP2_CRC32 != 0); + enable_feature(&mut value, Feature::aes, auxv.hwcap2 & HWCAP2_AES != 0); + // SHA2 requires SHA1 & SHA2 features + let sha1 = auxv.hwcap2 & HWCAP2_SHA1 != 0; + let sha2 = auxv.hwcap2 & HWCAP2_SHA2 != 0; + enable_feature(&mut value, Feature::sha2, sha1 && sha2); + return value; + } + value +} diff --git a/library/std_detect/src/detect/os/freebsd/auxvec.rs b/library/std_detect/src/detect/os/freebsd/auxvec.rs new file mode 100644 index 00000000000..4e72bf22d76 --- /dev/null +++ b/library/std_detect/src/detect/os/freebsd/auxvec.rs @@ -0,0 +1,66 @@ +//! Parses ELF auxiliary vectors. +#![cfg_attr( + any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc64", + target_arch = "riscv64" + ), + allow(dead_code) +)] + +/// Cache HWCAP bitfields of the ELF Auxiliary Vector. +/// +/// If an entry cannot be read all the bits in the bitfield are set to zero. +/// This should be interpreted as all the features being disabled. +#[derive(Debug, Copy, Clone)] +pub(crate) struct AuxVec { + pub hwcap: usize, + pub hwcap2: usize, +} + +/// ELF Auxiliary Vector +/// +/// The auxiliary vector is a memory region in a running ELF program's stack +/// composed of (key: usize, value: usize) pairs. +/// +/// The keys used in the aux vector are platform dependent. For FreeBSD, they are +/// defined in [sys/elf_common.h][elf_common_h]. The hardware capabilities of a given +/// CPU can be queried with the `AT_HWCAP` and `AT_HWCAP2` keys. +/// +/// Note that run-time feature detection is not invoked for features that can +/// be detected at compile-time. +/// +/// [elf_common.h]: https://svnweb.freebsd.org/base/release/12.0.0/sys/sys/elf_common.h?revision=341707 +pub(crate) fn auxv() -> Result<AuxVec, ()> { + let hwcap = archauxv(libc::AT_HWCAP); + let hwcap2 = archauxv(libc::AT_HWCAP2); + // Zero could indicate that no features were detected, but it's also used to + // indicate an error. In particular, on many platforms AT_HWCAP2 will be + // legitimately zero, since it contains the most recent feature flags. + if hwcap != 0 || hwcap2 != 0 { + return Ok(AuxVec { hwcap, hwcap2 }); + } + Err(()) +} + +/// Tries to read the `key` from the auxiliary vector. +fn archauxv(key: libc::c_int) -> usize { + const OUT_LEN: libc::c_int = core::mem::size_of::<libc::c_ulong>() as libc::c_int; + let mut out: libc::c_ulong = 0; + unsafe { + // elf_aux_info is available on FreeBSD 12.0+ and 11.4+: + // https://github.com/freebsd/freebsd-src/commit/0b08ae2120cdd08c20a2b806e2fcef4d0a36c470 + // https://github.com/freebsd/freebsd-src/blob/release/11.4.0/sys/sys/auxv.h + // FreeBSD 11 support in std has been removed in Rust 1.75 (https://github.com/rust-lang/rust/pull/114521), + // so we can safely use this function. + let res = libc::elf_aux_info( + key, + &mut out as *mut libc::c_ulong as *mut libc::c_void, + OUT_LEN, + ); + // If elf_aux_info fails, `out` will be left at zero (which is the proper default value). + debug_assert!(res == 0 || out == 0); + } + out as usize +} diff --git a/library/std_detect/src/detect/os/freebsd/mod.rs b/library/std_detect/src/detect/os/freebsd/mod.rs new file mode 100644 index 00000000000..ade7fb6269d --- /dev/null +++ b/library/std_detect/src/detect/os/freebsd/mod.rs @@ -0,0 +1,22 @@ +//! Run-time feature detection on FreeBSD + +mod auxvec; + +cfg_if::cfg_if! { + if #[cfg(target_arch = "aarch64")] { + mod aarch64; + pub(crate) use self::aarch64::detect_features; + } else if #[cfg(target_arch = "arm")] { + mod arm; + pub(crate) use self::arm::detect_features; + } else if #[cfg(target_arch = "powerpc64")] { + mod powerpc; + pub(crate) use self::powerpc::detect_features; + } else { + use crate::detect::cache; + /// Performs run-time feature detection. + pub(crate) fn detect_features() -> cache::Initializer { + cache::Initializer::default() + } + } +} diff --git a/library/std_detect/src/detect/os/freebsd/powerpc.rs b/library/std_detect/src/detect/os/freebsd/powerpc.rs new file mode 100644 index 00000000000..d03af68cd08 --- /dev/null +++ b/library/std_detect/src/detect/os/freebsd/powerpc.rs @@ -0,0 +1,21 @@ +//! Run-time feature detection for PowerPC on FreeBSD. + +use super::auxvec; +use crate::detect::{Feature, cache}; + +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::altivec, auxv.hwcap & 0x10000000 != 0); + enable_feature(&mut value, Feature::vsx, auxv.hwcap & 0x00000080 != 0); + enable_feature(&mut value, Feature::power8, auxv.hwcap2 & 0x80000000 != 0); + return value; + } + value +} diff --git a/library/std_detect/src/detect/os/linux/aarch64.rs b/library/std_detect/src/detect/os/linux/aarch64.rs new file mode 100644 index 00000000000..22a9cefff7b --- /dev/null +++ b/library/std_detect/src/detect/os/linux/aarch64.rs @@ -0,0 +1,484 @@ +//! Run-time feature detection for Aarch64 on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + #[cfg(target_os = "android")] + let is_exynos9810 = { + // Samsung Exynos 9810 has a bug that big and little cores have different + // ISAs. And on older Android (pre-9), the kernel incorrectly reports + // that features available only on some cores are available on all cores. + // https://reviews.llvm.org/D114523 + let mut arch = [0_u8; libc::PROP_VALUE_MAX as usize]; + let len = unsafe { + libc::__system_property_get(c"ro.arch".as_ptr(), arch.as_mut_ptr() as *mut libc::c_char) + }; + // On Exynos, ro.arch is not available on Android 12+, but it is fine + // because Android 9+ includes the fix. + len > 0 && arch.starts_with(b"exynos9810") + }; + #[cfg(not(target_os = "android"))] + let is_exynos9810 = false; + + if let Ok(auxv) = auxvec::auxv() { + let hwcap: AtHwcap = auxv.into(); + return hwcap.cache(is_exynos9810); + } + cache::Initializer::default() +} + +/// These values are part of the platform-specific [asm/hwcap.h][hwcap] . +/// +/// The names match those used for cpuinfo. +/// +/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h +#[derive(Debug, Default, PartialEq)] +struct AtHwcap { + // AT_HWCAP + fp: bool, + asimd: bool, + // evtstrm: No LLVM support. + aes: bool, + pmull: bool, + sha1: bool, + sha2: bool, + crc32: bool, + atomics: bool, + fphp: bool, + asimdhp: bool, + // cpuid: No LLVM support. + asimdrdm: bool, + jscvt: bool, + fcma: bool, + lrcpc: bool, + dcpop: bool, + sha3: bool, + sm3: bool, + sm4: bool, + asimddp: bool, + sha512: bool, + sve: bool, + fhm: bool, + dit: bool, + uscat: bool, + ilrcpc: bool, + flagm: bool, + ssbs: bool, + sb: bool, + paca: bool, + pacg: bool, + + // AT_HWCAP2 + dcpodp: bool, + sve2: bool, + sveaes: bool, + svepmull: bool, + svebitperm: bool, + svesha3: bool, + svesm4: bool, + flagm2: bool, + frint: bool, + // svei8mm: See i8mm feature. + svef32mm: bool, + svef64mm: bool, + // svebf16: See bf16 feature. + i8mm: bool, + bf16: bool, + // dgh: No LLVM support. + rng: bool, + bti: bool, + mte: bool, + ecv: bool, + // afp: bool, + // rpres: bool, + // mte3: bool, + sme: bool, + smei16i64: bool, + smef64f64: bool, + // smei8i32: bool, + // smef16f32: bool, + // smeb16f32: bool, + // smef32f32: bool, + smefa64: bool, + wfxt: bool, + // ebf16: bool, + // sveebf16: bool, + cssc: bool, + // rprfm: bool, + sve2p1: bool, + sme2: bool, + sme2p1: bool, + // smei16i32: bool, + // smebi32i32: bool, + smeb16b16: bool, + smef16f16: bool, + mops: bool, + hbc: bool, + sveb16b16: bool, + lrcpc3: bool, + lse128: bool, + fpmr: bool, + lut: bool, + faminmax: bool, + f8cvt: bool, + f8fma: bool, + f8dp4: bool, + f8dp2: bool, + f8e4m3: bool, + f8e5m2: bool, + smelutv2: bool, + smef8f16: bool, + smef8f32: bool, + smesf8fma: bool, + smesf8dp4: bool, + smesf8dp2: bool, + // pauthlr: bool, +} + +impl From<auxvec::AuxVec> for AtHwcap { + /// Reads AtHwcap from the auxiliary vector. + fn from(auxv: auxvec::AuxVec) -> Self { + AtHwcap { + fp: bit::test(auxv.hwcap, 0), + asimd: bit::test(auxv.hwcap, 1), + // evtstrm: bit::test(auxv.hwcap, 2), + aes: bit::test(auxv.hwcap, 3), + pmull: bit::test(auxv.hwcap, 4), + sha1: bit::test(auxv.hwcap, 5), + sha2: bit::test(auxv.hwcap, 6), + crc32: bit::test(auxv.hwcap, 7), + atomics: bit::test(auxv.hwcap, 8), + fphp: bit::test(auxv.hwcap, 9), + asimdhp: bit::test(auxv.hwcap, 10), + // cpuid: bit::test(auxv.hwcap, 11), + asimdrdm: bit::test(auxv.hwcap, 12), + jscvt: bit::test(auxv.hwcap, 13), + fcma: bit::test(auxv.hwcap, 14), + lrcpc: bit::test(auxv.hwcap, 15), + dcpop: bit::test(auxv.hwcap, 16), + sha3: bit::test(auxv.hwcap, 17), + sm3: bit::test(auxv.hwcap, 18), + sm4: bit::test(auxv.hwcap, 19), + asimddp: bit::test(auxv.hwcap, 20), + sha512: bit::test(auxv.hwcap, 21), + sve: bit::test(auxv.hwcap, 22), + fhm: bit::test(auxv.hwcap, 23), + dit: bit::test(auxv.hwcap, 24), + uscat: bit::test(auxv.hwcap, 25), + ilrcpc: bit::test(auxv.hwcap, 26), + flagm: bit::test(auxv.hwcap, 27), + ssbs: bit::test(auxv.hwcap, 28), + sb: bit::test(auxv.hwcap, 29), + paca: bit::test(auxv.hwcap, 30), + pacg: bit::test(auxv.hwcap, 31), + + // AT_HWCAP2 + dcpodp: bit::test(auxv.hwcap2, 0), + sve2: bit::test(auxv.hwcap2, 1), + sveaes: bit::test(auxv.hwcap2, 2), + svepmull: bit::test(auxv.hwcap2, 3), + svebitperm: bit::test(auxv.hwcap2, 4), + svesha3: bit::test(auxv.hwcap2, 5), + svesm4: bit::test(auxv.hwcap2, 6), + flagm2: bit::test(auxv.hwcap2, 7), + frint: bit::test(auxv.hwcap2, 8), + // svei8mm: bit::test(auxv.hwcap2, 9), + svef32mm: bit::test(auxv.hwcap2, 10), + svef64mm: bit::test(auxv.hwcap2, 11), + // svebf16: bit::test(auxv.hwcap2, 12), + i8mm: bit::test(auxv.hwcap2, 13), + bf16: bit::test(auxv.hwcap2, 14), + // dgh: bit::test(auxv.hwcap2, 15), + rng: bit::test(auxv.hwcap2, 16), + bti: bit::test(auxv.hwcap2, 17), + mte: bit::test(auxv.hwcap2, 18), + ecv: bit::test(auxv.hwcap2, 19), + // afp: bit::test(auxv.hwcap2, 20), + // rpres: bit::test(auxv.hwcap2, 21), + // mte3: bit::test(auxv.hwcap2, 22), + sme: bit::test(auxv.hwcap2, 23), + smei16i64: bit::test(auxv.hwcap2, 24), + smef64f64: bit::test(auxv.hwcap2, 25), + // smei8i32: bit::test(auxv.hwcap2, 26), + // smef16f32: bit::test(auxv.hwcap2, 27), + // smeb16f32: bit::test(auxv.hwcap2, 28), + // smef32f32: bit::test(auxv.hwcap2, 29), + smefa64: bit::test(auxv.hwcap2, 30), + wfxt: bit::test(auxv.hwcap2, 31), + // ebf16: bit::test(auxv.hwcap2, 32), + // sveebf16: bit::test(auxv.hwcap2, 33), + cssc: bit::test(auxv.hwcap2, 34), + // rprfm: bit::test(auxv.hwcap2, 35), + sve2p1: bit::test(auxv.hwcap2, 36), + sme2: bit::test(auxv.hwcap2, 37), + sme2p1: bit::test(auxv.hwcap2, 38), + // smei16i32: bit::test(auxv.hwcap2, 39), + // smebi32i32: bit::test(auxv.hwcap2, 40), + smeb16b16: bit::test(auxv.hwcap2, 41), + smef16f16: bit::test(auxv.hwcap2, 42), + mops: bit::test(auxv.hwcap2, 43), + hbc: bit::test(auxv.hwcap2, 44), + sveb16b16: bit::test(auxv.hwcap2, 45), + lrcpc3: bit::test(auxv.hwcap2, 46), + lse128: bit::test(auxv.hwcap2, 47), + fpmr: bit::test(auxv.hwcap2, 48), + lut: bit::test(auxv.hwcap2, 49), + faminmax: bit::test(auxv.hwcap2, 50), + f8cvt: bit::test(auxv.hwcap2, 51), + f8fma: bit::test(auxv.hwcap2, 52), + f8dp4: bit::test(auxv.hwcap2, 53), + f8dp2: bit::test(auxv.hwcap2, 54), + f8e4m3: bit::test(auxv.hwcap2, 55), + f8e5m2: bit::test(auxv.hwcap2, 56), + smelutv2: bit::test(auxv.hwcap2, 57), + smef8f16: bit::test(auxv.hwcap2, 58), + smef8f32: bit::test(auxv.hwcap2, 59), + smesf8fma: bit::test(auxv.hwcap2, 60), + smesf8dp4: bit::test(auxv.hwcap2, 61), + smesf8dp2: bit::test(auxv.hwcap2, 62), + // pauthlr: bit::test(auxv.hwcap2, ??), + } + } +} + +impl AtHwcap { + /// Initializes the cache from the feature -bits. + /// + /// The feature dependencies here come directly from LLVM's feature definitions: + /// https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AArch64/AArch64.td + fn cache(self, is_exynos9810: bool) -> cache::Initializer { + let mut value = cache::Initializer::default(); + { + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + // Samsung Exynos 9810 has a bug that big and little cores have different + // ISAs. And on older Android (pre-9), the kernel incorrectly reports + // that features available only on some cores are available on all cores. + // So, only check features that are known to be available on exynos-m3: + // $ rustc --print cfg --target aarch64-linux-android -C target-cpu=exynos-m3 | grep target_feature + // See also https://github.com/rust-lang/stdarch/pull/1378#discussion_r1103748342. + if is_exynos9810 { + enable_feature(Feature::fp, self.fp); + enable_feature(Feature::crc, self.crc32); + // ASIMD support requires float support - if half-floats are + // supported, it also requires half-float support: + let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp); + enable_feature(Feature::asimd, asimd); + // Cryptographic extensions require ASIMD + // AES also covers FEAT_PMULL + enable_feature(Feature::aes, self.aes && self.pmull && asimd); + enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd); + return value; + } + + enable_feature(Feature::fp, self.fp); + // Half-float support requires float support + enable_feature(Feature::fp16, self.fp && self.fphp); + // FHM (fp16fml in LLVM) requires half float support + enable_feature(Feature::fhm, self.fphp && self.fhm); + enable_feature(Feature::pmull, self.pmull); + enable_feature(Feature::crc, self.crc32); + enable_feature(Feature::lse, self.atomics); + enable_feature(Feature::lse2, self.uscat); + enable_feature(Feature::lse128, self.lse128 && self.atomics); + enable_feature(Feature::rcpc, self.lrcpc); + // RCPC2 (rcpc-immo in LLVM) requires RCPC support + let rcpc2 = self.ilrcpc && self.lrcpc; + enable_feature(Feature::rcpc2, rcpc2); + enable_feature(Feature::rcpc3, self.lrcpc3 && rcpc2); + enable_feature(Feature::dit, self.dit); + enable_feature(Feature::flagm, self.flagm); + enable_feature(Feature::flagm2, self.flagm2); + enable_feature(Feature::ssbs, self.ssbs); + enable_feature(Feature::sb, self.sb); + enable_feature(Feature::paca, self.paca); + enable_feature(Feature::pacg, self.pacg); + // enable_feature(Feature::pauth_lr, self.pauthlr); + enable_feature(Feature::dpb, self.dcpop); + enable_feature(Feature::dpb2, self.dcpodp); + enable_feature(Feature::rand, self.rng); + enable_feature(Feature::bti, self.bti); + enable_feature(Feature::mte, self.mte); + // jsconv requires float support + enable_feature(Feature::jsconv, self.jscvt && self.fp); + enable_feature(Feature::rdm, self.asimdrdm); + enable_feature(Feature::dotprod, self.asimddp); + enable_feature(Feature::frintts, self.frint); + + // FEAT_I8MM & FEAT_BF16 also include optional SVE components which linux exposes + // separately. We ignore that distinction here. + enable_feature(Feature::i8mm, self.i8mm); + enable_feature(Feature::bf16, self.bf16); + + // ASIMD support requires float support - if half-floats are + // supported, it also requires half-float support: + let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp); + enable_feature(Feature::asimd, asimd); + // ASIMD extensions require ASIMD support: + enable_feature(Feature::fcma, self.fcma && asimd); + enable_feature(Feature::sve, self.sve && asimd); + + // SVE extensions require SVE & ASIMD + enable_feature(Feature::f32mm, self.svef32mm && self.sve && asimd); + enable_feature(Feature::f64mm, self.svef64mm && self.sve && asimd); + + // Cryptographic extensions require ASIMD + enable_feature(Feature::aes, self.aes && asimd); + enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd); + // SHA512/SHA3 require SHA1 & SHA256 + enable_feature( + Feature::sha3, + self.sha512 && self.sha3 && self.sha1 && self.sha2 && asimd, + ); + enable_feature(Feature::sm4, self.sm3 && self.sm4 && asimd); + + // SVE2 requires SVE + let sve2 = self.sve2 && self.sve && asimd; + enable_feature(Feature::sve2, sve2); + enable_feature(Feature::sve2p1, self.sve2p1 && sve2); + // SVE2 extensions require SVE2 and crypto features + enable_feature( + Feature::sve2_aes, + self.sveaes && self.svepmull && sve2 && self.aes, + ); + enable_feature( + Feature::sve2_sm4, + self.svesm4 && sve2 && self.sm3 && self.sm4, + ); + enable_feature( + Feature::sve2_sha3, + self.svesha3 && sve2 && self.sha512 && self.sha3 && self.sha1 && self.sha2, + ); + enable_feature(Feature::sve2_bitperm, self.svebitperm && self.sve2); + enable_feature(Feature::sve_b16b16, self.bf16 && self.sveb16b16); + enable_feature(Feature::hbc, self.hbc); + enable_feature(Feature::mops, self.mops); + enable_feature(Feature::ecv, self.ecv); + enable_feature(Feature::lut, self.lut); + enable_feature(Feature::cssc, self.cssc); + enable_feature(Feature::fpmr, self.fpmr); + enable_feature(Feature::faminmax, self.faminmax); + let fp8 = self.f8cvt && self.faminmax && self.lut && self.bf16; + enable_feature(Feature::fp8, fp8); + let fp8fma = self.f8fma && fp8; + enable_feature(Feature::fp8fma, fp8fma); + let fp8dot4 = self.f8dp4 && fp8fma; + enable_feature(Feature::fp8dot4, fp8dot4); + enable_feature(Feature::fp8dot2, self.f8dp2 && fp8dot4); + enable_feature(Feature::wfxt, self.wfxt); + let sme = self.sme && self.bf16; + enable_feature(Feature::sme, sme); + enable_feature(Feature::sme_i16i64, self.smei16i64 && sme); + enable_feature(Feature::sme_f64f64, self.smef64f64 && sme); + enable_feature(Feature::sme_fa64, self.smefa64 && sme && sve2); + let sme2 = self.sme2 && sme; + enable_feature(Feature::sme2, sme2); + enable_feature(Feature::sme2p1, self.sme2p1 && sme2); + enable_feature( + Feature::sme_b16b16, + sme2 && self.bf16 && self.sveb16b16 && self.smeb16b16, + ); + enable_feature(Feature::sme_f16f16, self.smef16f16 && sme2); + enable_feature(Feature::sme_lutv2, self.smelutv2); + let sme_f8f32 = self.smef8f32 && sme2 && fp8; + enable_feature(Feature::sme_f8f32, sme_f8f32); + enable_feature(Feature::sme_f8f16, self.smef8f16 && sme_f8f32); + let ssve_fp8fma = self.smesf8fma && sme2 && fp8; + enable_feature(Feature::ssve_fp8fma, ssve_fp8fma); + let ssve_fp8dot4 = self.smesf8dp4 && ssve_fp8fma; + enable_feature(Feature::ssve_fp8dot4, ssve_fp8dot4); + enable_feature(Feature::ssve_fp8dot2, self.smesf8dp2 && ssve_fp8dot4); + } + value + } +} + +#[cfg(target_endian = "little")] +#[cfg(test)] +mod tests { + use super::*; + + #[cfg(feature = "std_detect_file_io")] + mod auxv_from_file { + use super::auxvec::auxv_from_file; + use super::*; + // The baseline hwcaps used in the (artificial) auxv test files. + fn baseline_hwcaps() -> AtHwcap { + AtHwcap { + fp: true, + asimd: true, + aes: true, + pmull: true, + sha1: true, + sha2: true, + crc32: true, + atomics: true, + fphp: true, + asimdhp: true, + asimdrdm: true, + lrcpc: true, + dcpop: true, + asimddp: true, + ssbs: true, + ..AtHwcap::default() + } + } + + #[test] + fn linux_empty_hwcap2_aarch64() { + let file = concat!( + env!("CARGO_MANIFEST_DIR"), + "/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv" + ); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + println!("HWCAP : 0x{:0x}", v.hwcap); + println!("HWCAP2: 0x{:0x}", v.hwcap2); + assert_eq!(AtHwcap::from(v), baseline_hwcaps()); + } + #[test] + fn linux_no_hwcap2_aarch64() { + let file = concat!( + env!("CARGO_MANIFEST_DIR"), + "/src/detect/test_data/linux-no-hwcap2-aarch64.auxv" + ); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + println!("HWCAP : 0x{:0x}", v.hwcap); + println!("HWCAP2: 0x{:0x}", v.hwcap2); + assert_eq!(AtHwcap::from(v), baseline_hwcaps()); + } + #[test] + fn linux_hwcap2_aarch64() { + let file = concat!( + env!("CARGO_MANIFEST_DIR"), + "/src/detect/test_data/linux-hwcap2-aarch64.auxv" + ); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + println!("HWCAP : 0x{:0x}", v.hwcap); + println!("HWCAP2: 0x{:0x}", v.hwcap2); + assert_eq!( + AtHwcap::from(v), + AtHwcap { + // Some other HWCAP bits. + paca: true, + pacg: true, + // HWCAP2-only bits. + dcpodp: true, + frint: true, + rng: true, + bti: true, + mte: true, + ..baseline_hwcaps() + } + ); + } + } +} diff --git a/library/std_detect/src/detect/os/linux/arm.rs b/library/std_detect/src/detect/os/linux/arm.rs new file mode 100644 index 00000000000..bbb173227d0 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/arm.rs @@ -0,0 +1,34 @@ +//! Run-time feature detection for ARM on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm/include/uapi/asm/hwcap.h + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::i8mm, bit::test(auxv.hwcap, 27)); + enable_feature(&mut value, Feature::dotprod, bit::test(auxv.hwcap, 24)); + enable_feature(&mut value, Feature::neon, bit::test(auxv.hwcap, 12)); + enable_feature(&mut value, Feature::pmull, bit::test(auxv.hwcap2, 1)); + enable_feature(&mut value, Feature::crc, bit::test(auxv.hwcap2, 4)); + enable_feature(&mut value, Feature::aes, bit::test(auxv.hwcap2, 0)); + // SHA2 requires SHA1 & SHA2 features + enable_feature( + &mut value, + Feature::sha2, + bit::test(auxv.hwcap2, 2) && bit::test(auxv.hwcap2, 3), + ); + return value; + } + value +} diff --git a/library/std_detect/src/detect/os/linux/auxvec.rs b/library/std_detect/src/detect/os/linux/auxvec.rs new file mode 100644 index 00000000000..54f52aa7533 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/auxvec.rs @@ -0,0 +1,341 @@ +//! Parses ELF auxiliary vectors. +#![allow(dead_code)] + +pub(crate) const AT_NULL: usize = 0; + +/// Key to access the CPU Hardware capabilities bitfield. +pub(crate) const AT_HWCAP: usize = 16; +/// Key to access the CPU Hardware capabilities 2 bitfield. +#[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", +))] +pub(crate) const AT_HWCAP2: usize = 26; + +/// Cache HWCAP bitfields of the ELF Auxiliary Vector. +/// +/// If an entry cannot be read all the bits in the bitfield are set to zero. +/// This should be interpreted as all the features being disabled. +#[derive(Debug, Copy, Clone)] +#[cfg_attr(test, derive(PartialEq))] +pub(crate) struct AuxVec { + pub hwcap: usize, + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + pub hwcap2: usize, +} + +/// ELF Auxiliary Vector +/// +/// The auxiliary vector is a memory region in a running ELF program's stack +/// composed of (key: usize, value: usize) pairs. +/// +/// The keys used in the aux vector are platform dependent. For Linux, they are +/// defined in [linux/auxvec.h][auxvec_h]. The hardware capabilities of a given +/// CPU can be queried with the `AT_HWCAP` and `AT_HWCAP2` keys. +/// +/// There is no perfect way of reading the auxiliary vector. +/// +/// - If the `std_detect_dlsym_getauxval` cargo feature is enabled, this will use +/// `getauxval` if its linked to the binary, and otherwise proceed to a fallback implementation. +/// When `std_detect_dlsym_getauxval` is disabled, this will assume that `getauxval` is +/// linked to the binary - if that is not the case the behavior is undefined. +/// - Otherwise, if the `std_detect_file_io` cargo feature is enabled, it will +/// try to read `/proc/self/auxv`. +/// - If that fails, this function returns an error. +/// +/// Note that run-time feature detection is not invoked for features that can +/// be detected at compile-time. +/// +/// Note: The `std_detect_dlsym_getauxval` cargo feature is ignored on +/// `*-linux-{gnu,musl,ohos}*` and `*-android*` targets because we can safely assume `getauxval` +/// is linked to the binary. +/// - `*-linux-gnu*` targets ([since Rust 1.64](https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html)) +/// have glibc requirements higher than [glibc 2.16 that added `getauxval`](https://sourceware.org/legacy-ml/libc-announce/2012/msg00000.html). +/// - `*-linux-musl*` targets ([at least since Rust 1.15](https://github.com/rust-lang/rust/blob/1.15.0/src/ci/docker/x86_64-musl/build-musl.sh#L15)) +/// use musl newer than [musl 1.1.0 that added `getauxval`](https://git.musl-libc.org/cgit/musl/tree/WHATSNEW?h=v1.1.0#n1197) +/// - `*-linux-ohos*` targets use a [fork of musl 1.2](https://gitee.com/openharmony/docs/blob/master/en/application-dev/reference/native-lib/musl.md) +/// - `*-android*` targets ([since Rust 1.68](https://blog.rust-lang.org/2023/01/09/android-ndk-update-r25.html)) +/// have the minimum supported API level higher than [Android 4.3 (API level 18) that added `getauxval`](https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/auxv.h#L49). +/// +/// For more information about when `getauxval` is available check the great +/// [`auxv` crate documentation][auxv_docs]. +/// +/// [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h +/// [auxv_docs]: https://docs.rs/auxv/0.3.3/auxv/ +pub(crate) fn auxv() -> Result<AuxVec, ()> { + // Try to call a getauxval function. + if let Ok(hwcap) = getauxval(AT_HWCAP) { + // Targets with only AT_HWCAP: + #[cfg(any( + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "mips", + target_arch = "mips64", + target_arch = "loongarch32", + target_arch = "loongarch64", + ))] + { + // Zero could indicate that no features were detected, but it's also used to indicate + // an error. In either case, try the fallback. + if hwcap != 0 { + return Ok(AuxVec { hwcap }); + } + } + + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + { + if let Ok(hwcap2) = getauxval(AT_HWCAP2) { + // Zero could indicate that no features were detected, but it's also used to indicate + // an error. In particular, on many platforms AT_HWCAP2 will be legitimately zero, + // since it contains the most recent feature flags. Use the fallback only if no + // features were detected at all. + if hwcap != 0 || hwcap2 != 0 { + return Ok(AuxVec { hwcap, hwcap2 }); + } + } + } + + // Intentionnaly not used + let _ = hwcap; + } + + #[cfg(feature = "std_detect_file_io")] + { + // If calling getauxval fails, try to read the auxiliary vector from + // its file: + auxv_from_file("/proc/self/auxv") + } + #[cfg(not(feature = "std_detect_file_io"))] + { + Err(()) + } +} + +/// Tries to read the `key` from the auxiliary vector by calling the +/// `getauxval` function. If the function is not linked, this function return `Err`. +fn getauxval(key: usize) -> Result<usize, ()> { + type F = unsafe extern "C" fn(libc::c_ulong) -> libc::c_ulong; + cfg_if::cfg_if! { + if #[cfg(all( + feature = "std_detect_dlsym_getauxval", + not(all( + target_os = "linux", + any(target_env = "gnu", target_env = "musl", target_env = "ohos"), + )), + not(target_os = "android"), + ))] { + let ffi_getauxval: F = unsafe { + let ptr = libc::dlsym(libc::RTLD_DEFAULT, c"getauxval".as_ptr()); + if ptr.is_null() { + return Err(()); + } + core::mem::transmute(ptr) + }; + } else { + let ffi_getauxval: F = libc::getauxval; + } + } + Ok(unsafe { ffi_getauxval(key as libc::c_ulong) as usize }) +} + +/// Tries to read the auxiliary vector from the `file`. If this fails, this +/// function returns `Err`. +#[cfg(feature = "std_detect_file_io")] +pub(super) fn auxv_from_file(file: &str) -> Result<AuxVec, ()> { + let file = super::read_file(file)?; + + // See <https://github.com/torvalds/linux/blob/v5.15/include/uapi/linux/auxvec.h>. + // + // The auxiliary vector contains at most 34 (key,value) fields: from + // `AT_MINSIGSTKSZ` to `AT_NULL`, but its number may increase. + let len = file.len(); + let mut buf = alloc::vec![0_usize; 1 + len / core::mem::size_of::<usize>()]; + unsafe { + core::ptr::copy_nonoverlapping(file.as_ptr(), buf.as_mut_ptr() as *mut u8, len); + } + + auxv_from_buf(&buf) +} + +/// Tries to interpret the `buffer` as an auxiliary vector. If that fails, this +/// function returns `Err`. +#[cfg(feature = "std_detect_file_io")] +fn auxv_from_buf(buf: &[usize]) -> Result<AuxVec, ()> { + // Targets with only AT_HWCAP: + #[cfg(any( + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "mips", + target_arch = "mips64", + target_arch = "loongarch32", + target_arch = "loongarch64", + ))] + { + for el in buf.chunks(2) { + match el[0] { + AT_NULL => break, + AT_HWCAP => return Ok(AuxVec { hwcap: el[1] }), + _ => (), + } + } + } + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + { + let mut hwcap = None; + // For some platforms, AT_HWCAP2 was added recently, so let it default to zero. + let mut hwcap2 = 0; + for el in buf.chunks(2) { + match el[0] { + AT_NULL => break, + AT_HWCAP => hwcap = Some(el[1]), + AT_HWCAP2 => hwcap2 = el[1], + _ => (), + } + } + + if let Some(hwcap) = hwcap { + return Ok(AuxVec { hwcap, hwcap2 }); + } + } + // Suppress unused variable + let _ = buf; + Err(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + // FIXME: on mips/mips64 getauxval returns 0, and /proc/self/auxv + // does not always contain the AT_HWCAP key under qemu. + #[cfg(any( + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + #[test] + fn auxv_crate() { + let v = auxv(); + if let Ok(hwcap) = getauxval(AT_HWCAP) { + let rt_hwcap = v.expect("failed to find hwcap key").hwcap; + assert_eq!(rt_hwcap, hwcap); + } + + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + { + if let Ok(hwcap2) = getauxval(AT_HWCAP2) { + let rt_hwcap2 = v.expect("failed to find hwcap2 key").hwcap2; + assert_eq!(rt_hwcap2, hwcap2); + } + } + } + + #[test] + fn auxv_dump() { + if let Ok(auxvec) = auxv() { + println!("{:?}", auxvec); + } else { + println!("both getauxval() and reading /proc/self/auxv failed!"); + } + } + + #[cfg(feature = "std_detect_file_io")] + cfg_if::cfg_if! { + if #[cfg(target_arch = "arm")] { + #[test] + fn linux_rpi3() { + let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-rpi3.auxv"); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + assert_eq!(v.hwcap, 4174038); + assert_eq!(v.hwcap2, 16); + } + + #[test] + fn linux_macos_vb() { + let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv"); + println!("file: {file}"); + // The file contains HWCAP but not HWCAP2. In that case, we treat HWCAP2 as zero. + let v = auxv_from_file(file).unwrap(); + assert_eq!(v.hwcap, 126614527); + assert_eq!(v.hwcap2, 0); + } + } else if #[cfg(target_arch = "aarch64")] { + #[cfg(target_endian = "little")] + #[test] + fn linux_artificial_aarch64() { + let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-artificial-aarch64.auxv"); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + assert_eq!(v.hwcap, 0x0123456789abcdef); + assert_eq!(v.hwcap2, 0x02468ace13579bdf); + } + #[cfg(target_endian = "little")] + #[test] + fn linux_no_hwcap2_aarch64() { + let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-no-hwcap2-aarch64.auxv"); + println!("file: {file}"); + let v = auxv_from_file(file).unwrap(); + // An absent HWCAP2 is treated as zero, and does not prevent acceptance of HWCAP. + assert_ne!(v.hwcap, 0); + assert_eq!(v.hwcap2, 0); + } + } + } + + #[test] + #[cfg(feature = "std_detect_file_io")] + fn auxv_dump_procfs() { + if let Ok(auxvec) = auxv_from_file("/proc/self/auxv") { + println!("{:?}", auxvec); + } else { + println!("reading /proc/self/auxv failed!"); + } + } + + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", + ))] + #[test] + #[cfg(feature = "std_detect_file_io")] + fn auxv_crate_procfs() { + if let Ok(procfs_auxv) = auxv_from_file("/proc/self/auxv") { + assert_eq!(auxv().unwrap(), procfs_auxv); + } + } +} diff --git a/library/std_detect/src/detect/os/linux/loongarch.rs b/library/std_detect/src/detect/os/linux/loongarch.rs new file mode 100644 index 00000000000..14cc7a73183 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/loongarch.rs @@ -0,0 +1,68 @@ +//! Run-time feature detection for LoongArch on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; +use core::arch::asm; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, feature, enable| { + if enable { + value.set(feature as u32); + } + }; + + // The values are part of the platform-specific [cpucfg] + // + // [cpucfg]: LoongArch Reference Manual Volume 1: Basic Architecture v1.1 + let cpucfg2: usize; + unsafe { + asm!( + "cpucfg {}, {}", + out(reg) cpucfg2, in(reg) 2, + options(pure, nomem, preserves_flags, nostack) + ); + } + let cpucfg3: usize; + unsafe { + asm!( + "cpucfg {}, {}", + out(reg) cpucfg3, in(reg) 3, + options(pure, nomem, preserves_flags, nostack) + ); + } + enable_feature(&mut value, Feature::frecipe, bit::test(cpucfg2, 25)); + enable_feature(&mut value, Feature::div32, bit::test(cpucfg2, 26)); + enable_feature(&mut value, Feature::lam_bh, bit::test(cpucfg2, 27)); + enable_feature(&mut value, Feature::lamcas, bit::test(cpucfg2, 28)); + enable_feature(&mut value, Feature::scq, bit::test(cpucfg2, 30)); + enable_feature(&mut value, Feature::ld_seq_sa, bit::test(cpucfg3, 23)); + + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/loongarch/include/uapi/asm/hwcap.h + if let Ok(auxv) = auxvec::auxv() { + enable_feature( + &mut value, + Feature::f, + bit::test(cpucfg2, 1) && bit::test(auxv.hwcap, 3), + ); + enable_feature( + &mut value, + Feature::d, + bit::test(cpucfg2, 2) && bit::test(auxv.hwcap, 3), + ); + enable_feature(&mut value, Feature::lsx, bit::test(auxv.hwcap, 4)); + enable_feature(&mut value, Feature::lasx, bit::test(auxv.hwcap, 5)); + enable_feature( + &mut value, + Feature::lbt, + bit::test(auxv.hwcap, 10) && bit::test(auxv.hwcap, 11) && bit::test(auxv.hwcap, 12), + ); + enable_feature(&mut value, Feature::lvz, bit::test(auxv.hwcap, 9)); + enable_feature(&mut value, Feature::ual, bit::test(auxv.hwcap, 2)); + return value; + } + value +} diff --git a/library/std_detect/src/detect/os/linux/mips.rs b/library/std_detect/src/detect/os/linux/mips.rs new file mode 100644 index 00000000000..0cfa8869887 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/mips.rs @@ -0,0 +1,23 @@ +//! Run-time feature detection for MIPS on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/mips/include/uapi/asm/hwcap.h + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::msa, bit::test(auxv.hwcap, 1)); + return value; + } + value +} diff --git a/library/std_detect/src/detect/os/linux/mod.rs b/library/std_detect/src/detect/os/linux/mod.rs new file mode 100644 index 00000000000..9accd41717b --- /dev/null +++ b/library/std_detect/src/detect/os/linux/mod.rs @@ -0,0 +1,67 @@ +//! Run-time feature detection on Linux +//! +#[cfg(feature = "std_detect_file_io")] +use alloc::vec::Vec; + +mod auxvec; + +#[cfg(feature = "std_detect_file_io")] +fn read_file(path: &str) -> Result<Vec<u8>, ()> { + let mut path = Vec::from(path.as_bytes()); + path.push(0); + + unsafe { + let file = libc::open(path.as_ptr() as *const libc::c_char, libc::O_RDONLY); + if file == -1 { + return Err(()); + } + + let mut data = Vec::new(); + loop { + data.reserve(4096); + let spare = data.spare_capacity_mut(); + match libc::read(file, spare.as_mut_ptr() as *mut _, spare.len()) { + -1 => { + libc::close(file); + return Err(()); + } + 0 => break, + n => data.set_len(data.len() + n as usize), + } + } + + libc::close(file); + Ok(data) + } +} + +cfg_if::cfg_if! { + if #[cfg(target_arch = "aarch64")] { + mod aarch64; + pub(crate) use self::aarch64::detect_features; + } else if #[cfg(target_arch = "arm")] { + mod arm; + pub(crate) use self::arm::detect_features; + } else if #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] { + mod riscv; + pub(crate) use self::riscv::detect_features; + } else if #[cfg(any(target_arch = "mips", target_arch = "mips64"))] { + mod mips; + pub(crate) use self::mips::detect_features; + } else if #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] { + mod powerpc; + pub(crate) use self::powerpc::detect_features; + } else if #[cfg(any(target_arch = "loongarch32", target_arch = "loongarch64"))] { + mod loongarch; + pub(crate) use self::loongarch::detect_features; + } else if #[cfg(target_arch = "s390x")] { + mod s390x; + pub(crate) use self::s390x::detect_features; + } else { + use crate::detect::cache; + /// Performs run-time feature detection. + pub(crate) fn detect_features() -> cache::Initializer { + cache::Initializer::default() + } + } +} diff --git a/library/std_detect/src/detect/os/linux/powerpc.rs b/library/std_detect/src/detect/os/linux/powerpc.rs new file mode 100644 index 00000000000..6a4f7e715d9 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/powerpc.rs @@ -0,0 +1,35 @@ +//! Run-time feature detection for PowerPC on Linux. + +use super::auxvec; +use crate::detect::{Feature, cache}; + +/// Try to read the features from the auxiliary vector. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + // The values are part of the platform-specific [asm/cputable.h][cputable] + // + // [cputable]: https://github.com/torvalds/linux/blob/master/arch/powerpc/include/uapi/asm/cputable.h + if let Ok(auxv) = auxvec::auxv() { + // note: the PowerPC values are the mask to do the test (instead of the + // index of the bit to test like in ARM and Aarch64) + enable_feature(&mut value, Feature::altivec, auxv.hwcap & 0x10000000 != 0); + enable_feature(&mut value, Feature::vsx, auxv.hwcap & 0x00000080 != 0); + let power8_features = auxv.hwcap2 & 0x80000000 != 0; + enable_feature(&mut value, Feature::power8, power8_features); + enable_feature(&mut value, Feature::power8_altivec, power8_features); + enable_feature(&mut value, Feature::power8_crypto, power8_features); + enable_feature(&mut value, Feature::power8_vector, power8_features); + let power9_features = auxv.hwcap2 & 0x00800000 != 0; + enable_feature(&mut value, Feature::power9, power9_features); + enable_feature(&mut value, Feature::power9_altivec, power9_features); + enable_feature(&mut value, Feature::power9_vector, power9_features); + return value; + } + value +} diff --git a/library/std_detect/src/detect/os/linux/riscv.rs b/library/std_detect/src/detect/os/linux/riscv.rs new file mode 100644 index 00000000000..db20538af95 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/riscv.rs @@ -0,0 +1,330 @@ +//! Run-time feature detection for RISC-V on Linux. +//! +//! On RISC-V, detection using auxv only supports single-letter extensions. +//! So, we use riscv_hwprobe that supports multi-letter extensions if available. +//! <https://www.kernel.org/doc/html/latest/arch/riscv/hwprobe.html> + +use core::ptr; + +use super::super::riscv::imply_features; +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +// See <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/prctl.h?h=v6.15> +// for runtime status query constants. +const PR_RISCV_V_GET_CONTROL: libc::c_int = 70; +const PR_RISCV_V_VSTATE_CTRL_ON: libc::c_int = 2; +const PR_RISCV_V_VSTATE_CTRL_CUR_MASK: libc::c_int = 3; + +// See <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/include/uapi/asm/hwprobe.h?h=v6.15> +// for riscv_hwprobe struct and hardware probing constants. + +#[repr(C)] +struct riscv_hwprobe { + key: i64, + value: u64, +} + +impl riscv_hwprobe { + // key is overwritten to -1 if not supported by riscv_hwprobe syscall. + pub fn get(&self) -> Option<u64> { + (self.key != -1).then_some(self.value) + } +} + +#[allow(non_upper_case_globals)] +const __NR_riscv_hwprobe: libc::c_long = 258; + +const RISCV_HWPROBE_KEY_BASE_BEHAVIOR: i64 = 3; +const RISCV_HWPROBE_BASE_BEHAVIOR_IMA: u64 = 1 << 0; + +const RISCV_HWPROBE_KEY_IMA_EXT_0: i64 = 4; +const RISCV_HWPROBE_IMA_FD: u64 = 1 << 0; +const RISCV_HWPROBE_IMA_C: u64 = 1 << 1; +const RISCV_HWPROBE_IMA_V: u64 = 1 << 2; +const RISCV_HWPROBE_EXT_ZBA: u64 = 1 << 3; +const RISCV_HWPROBE_EXT_ZBB: u64 = 1 << 4; +const RISCV_HWPROBE_EXT_ZBS: u64 = 1 << 5; +const RISCV_HWPROBE_EXT_ZICBOZ: u64 = 1 << 6; +const RISCV_HWPROBE_EXT_ZBC: u64 = 1 << 7; +const RISCV_HWPROBE_EXT_ZBKB: u64 = 1 << 8; +const RISCV_HWPROBE_EXT_ZBKC: u64 = 1 << 9; +const RISCV_HWPROBE_EXT_ZBKX: u64 = 1 << 10; +const RISCV_HWPROBE_EXT_ZKND: u64 = 1 << 11; +const RISCV_HWPROBE_EXT_ZKNE: u64 = 1 << 12; +const RISCV_HWPROBE_EXT_ZKNH: u64 = 1 << 13; +const RISCV_HWPROBE_EXT_ZKSED: u64 = 1 << 14; +const RISCV_HWPROBE_EXT_ZKSH: u64 = 1 << 15; +const RISCV_HWPROBE_EXT_ZKT: u64 = 1 << 16; +const RISCV_HWPROBE_EXT_ZVBB: u64 = 1 << 17; +const RISCV_HWPROBE_EXT_ZVBC: u64 = 1 << 18; +const RISCV_HWPROBE_EXT_ZVKB: u64 = 1 << 19; +const RISCV_HWPROBE_EXT_ZVKG: u64 = 1 << 20; +const RISCV_HWPROBE_EXT_ZVKNED: u64 = 1 << 21; +const RISCV_HWPROBE_EXT_ZVKNHA: u64 = 1 << 22; +const RISCV_HWPROBE_EXT_ZVKNHB: u64 = 1 << 23; +const RISCV_HWPROBE_EXT_ZVKSED: u64 = 1 << 24; +const RISCV_HWPROBE_EXT_ZVKSH: u64 = 1 << 25; +const RISCV_HWPROBE_EXT_ZVKT: u64 = 1 << 26; +const RISCV_HWPROBE_EXT_ZFH: u64 = 1 << 27; +const RISCV_HWPROBE_EXT_ZFHMIN: u64 = 1 << 28; +const RISCV_HWPROBE_EXT_ZIHINTNTL: u64 = 1 << 29; +const RISCV_HWPROBE_EXT_ZVFH: u64 = 1 << 30; +const RISCV_HWPROBE_EXT_ZVFHMIN: u64 = 1 << 31; +const RISCV_HWPROBE_EXT_ZFA: u64 = 1 << 32; +const RISCV_HWPROBE_EXT_ZTSO: u64 = 1 << 33; +const RISCV_HWPROBE_EXT_ZACAS: u64 = 1 << 34; +const RISCV_HWPROBE_EXT_ZICOND: u64 = 1 << 35; +const RISCV_HWPROBE_EXT_ZIHINTPAUSE: u64 = 1 << 36; +const RISCV_HWPROBE_EXT_ZVE32X: u64 = 1 << 37; +const RISCV_HWPROBE_EXT_ZVE32F: u64 = 1 << 38; +const RISCV_HWPROBE_EXT_ZVE64X: u64 = 1 << 39; +const RISCV_HWPROBE_EXT_ZVE64F: u64 = 1 << 40; +const RISCV_HWPROBE_EXT_ZVE64D: u64 = 1 << 41; +const RISCV_HWPROBE_EXT_ZIMOP: u64 = 1 << 42; +const RISCV_HWPROBE_EXT_ZCA: u64 = 1 << 43; +const RISCV_HWPROBE_EXT_ZCB: u64 = 1 << 44; +const RISCV_HWPROBE_EXT_ZCD: u64 = 1 << 45; +const RISCV_HWPROBE_EXT_ZCF: u64 = 1 << 46; +const RISCV_HWPROBE_EXT_ZCMOP: u64 = 1 << 47; +const RISCV_HWPROBE_EXT_ZAWRS: u64 = 1 << 48; +// Excluded because it only reports the existence of `prctl`-based pointer masking control. +// const RISCV_HWPROBE_EXT_SUPM: u64 = 1 << 49; +const RISCV_HWPROBE_EXT_ZICNTR: u64 = 1 << 50; +const RISCV_HWPROBE_EXT_ZIHPM: u64 = 1 << 51; +const RISCV_HWPROBE_EXT_ZFBFMIN: u64 = 1 << 52; +const RISCV_HWPROBE_EXT_ZVFBFMIN: u64 = 1 << 53; +const RISCV_HWPROBE_EXT_ZVFBFWMA: u64 = 1 << 54; +const RISCV_HWPROBE_EXT_ZICBOM: u64 = 1 << 55; +const RISCV_HWPROBE_EXT_ZAAMO: u64 = 1 << 56; +const RISCV_HWPROBE_EXT_ZALRSC: u64 = 1 << 57; + +const RISCV_HWPROBE_KEY_CPUPERF_0: i64 = 5; +const RISCV_HWPROBE_MISALIGNED_FAST: u64 = 3; +const RISCV_HWPROBE_MISALIGNED_MASK: u64 = 7; + +const RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF: i64 = 9; +const RISCV_HWPROBE_MISALIGNED_SCALAR_FAST: u64 = 3; + +const RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF: i64 = 10; +const RISCV_HWPROBE_MISALIGNED_VECTOR_FAST: u64 = 3; + +// syscall returns an unsupported error if riscv_hwprobe is not supported, +// so we can safely use this function on older versions of Linux. +fn _riscv_hwprobe(out: &mut [riscv_hwprobe]) -> bool { + unsafe fn __riscv_hwprobe( + pairs: *mut riscv_hwprobe, + pair_count: libc::size_t, + cpu_set_size: libc::size_t, + cpus: *mut libc::c_ulong, + flags: libc::c_uint, + ) -> libc::c_long { + unsafe { + libc::syscall( + __NR_riscv_hwprobe, + pairs, + pair_count, + cpu_set_size, + cpus, + flags, + ) + } + } + + unsafe { __riscv_hwprobe(out.as_mut_ptr(), out.len(), 0, ptr::null_mut(), 0) == 0 } +} + +/// Read list of supported features from (1) the auxiliary vector +/// and (2) the results of `riscv_hwprobe` and `prctl` system calls. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let mut enable_feature = |feature, enable| { + if enable { + value.set(feature as u32); + } + }; + + // Use auxiliary vector to enable single-letter ISA extensions. + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/include/uapi/asm/hwcap.h?h=v6.15 + let auxv = auxvec::auxv().expect("read auxvec"); // should not fail on RISC-V platform + let mut has_i = bit::test(auxv.hwcap, (b'i' - b'a').into()); + #[allow(clippy::eq_op)] + enable_feature(Feature::a, bit::test(auxv.hwcap, (b'a' - b'a').into())); + enable_feature(Feature::c, bit::test(auxv.hwcap, (b'c' - b'a').into())); + enable_feature(Feature::d, bit::test(auxv.hwcap, (b'd' - b'a').into())); + enable_feature(Feature::f, bit::test(auxv.hwcap, (b'f' - b'a').into())); + enable_feature(Feature::m, bit::test(auxv.hwcap, (b'm' - b'a').into())); + let has_v = bit::test(auxv.hwcap, (b'v' - b'a').into()); + let mut is_v_set = false; + + // Use riscv_hwprobe syscall to query more extensions and + // performance-related capabilities. + 'hwprobe: { + macro_rules! init { + { $($name: ident : $key: expr),* $(,)? } => { + #[repr(usize)] + enum Indices { $($name),* } + let mut t = [$(riscv_hwprobe { key: $key, value: 0 }),*]; + macro_rules! data_mut { () => { &mut t } } + macro_rules! query { [$idx: ident] => { t[Indices::$idx as usize].get() } } + } + } + init! { + BaseBehavior: RISCV_HWPROBE_KEY_BASE_BEHAVIOR, + Extensions: RISCV_HWPROBE_KEY_IMA_EXT_0, + MisalignedScalarPerf: RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF, + MisalignedVectorPerf: RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF, + MisalignedScalarPerfFallback: RISCV_HWPROBE_KEY_CPUPERF_0, + }; + if !_riscv_hwprobe(data_mut!()) { + break 'hwprobe; + } + + // Query scalar misaligned behavior. + if let Some(value) = query![MisalignedScalarPerf] { + enable_feature( + Feature::unaligned_scalar_mem, + value == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST, + ); + } else if let Some(value) = query![MisalignedScalarPerfFallback] { + // Deprecated method for fallback + enable_feature( + Feature::unaligned_scalar_mem, + value & RISCV_HWPROBE_MISALIGNED_MASK == RISCV_HWPROBE_MISALIGNED_FAST, + ); + } + + // Query vector misaligned behavior. + if let Some(value) = query![MisalignedVectorPerf] { + enable_feature( + Feature::unaligned_vector_mem, + value == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST, + ); + } + + // Query whether "I" base and extensions "M" and "A" (as in the ISA + // manual version 2.2) are enabled. "I" base at that time corresponds + // to "I", "Zicsr", "Zicntr" and "Zifencei" (as in the ISA manual version + // 20240411). + // This is a current requirement of + // `RISCV_HWPROBE_KEY_IMA_EXT_0`-based tests. + if query![BaseBehavior].is_none_or(|value| value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA == 0) { + break 'hwprobe; + } + has_i = true; + enable_feature(Feature::zicsr, true); + enable_feature(Feature::zicntr, true); + enable_feature(Feature::zifencei, true); + enable_feature(Feature::m, true); + enable_feature(Feature::a, true); + + // Enable features based on `RISCV_HWPROBE_KEY_IMA_EXT_0`. + let Some(ima_ext_0) = query![Extensions] else { + break 'hwprobe; + }; + let test = |mask| (ima_ext_0 & mask) != 0; + + enable_feature(Feature::d, test(RISCV_HWPROBE_IMA_FD)); // F is implied. + enable_feature(Feature::c, test(RISCV_HWPROBE_IMA_C)); + + enable_feature(Feature::zicntr, test(RISCV_HWPROBE_EXT_ZICNTR)); + enable_feature(Feature::zihpm, test(RISCV_HWPROBE_EXT_ZIHPM)); + + enable_feature(Feature::zihintntl, test(RISCV_HWPROBE_EXT_ZIHINTNTL)); + enable_feature(Feature::zihintpause, test(RISCV_HWPROBE_EXT_ZIHINTPAUSE)); + enable_feature(Feature::zimop, test(RISCV_HWPROBE_EXT_ZIMOP)); + enable_feature(Feature::zicbom, test(RISCV_HWPROBE_EXT_ZICBOM)); + enable_feature(Feature::zicboz, test(RISCV_HWPROBE_EXT_ZICBOZ)); + enable_feature(Feature::zicond, test(RISCV_HWPROBE_EXT_ZICOND)); + + enable_feature(Feature::zalrsc, test(RISCV_HWPROBE_EXT_ZALRSC)); + enable_feature(Feature::zaamo, test(RISCV_HWPROBE_EXT_ZAAMO)); + enable_feature(Feature::zawrs, test(RISCV_HWPROBE_EXT_ZAWRS)); + enable_feature(Feature::zacas, test(RISCV_HWPROBE_EXT_ZACAS)); + enable_feature(Feature::ztso, test(RISCV_HWPROBE_EXT_ZTSO)); + + enable_feature(Feature::zba, test(RISCV_HWPROBE_EXT_ZBA)); + enable_feature(Feature::zbb, test(RISCV_HWPROBE_EXT_ZBB)); + enable_feature(Feature::zbs, test(RISCV_HWPROBE_EXT_ZBS)); + enable_feature(Feature::zbc, test(RISCV_HWPROBE_EXT_ZBC)); + + enable_feature(Feature::zbkb, test(RISCV_HWPROBE_EXT_ZBKB)); + enable_feature(Feature::zbkc, test(RISCV_HWPROBE_EXT_ZBKC)); + enable_feature(Feature::zbkx, test(RISCV_HWPROBE_EXT_ZBKX)); + enable_feature(Feature::zknd, test(RISCV_HWPROBE_EXT_ZKND)); + enable_feature(Feature::zkne, test(RISCV_HWPROBE_EXT_ZKNE)); + enable_feature(Feature::zknh, test(RISCV_HWPROBE_EXT_ZKNH)); + enable_feature(Feature::zksed, test(RISCV_HWPROBE_EXT_ZKSED)); + enable_feature(Feature::zksh, test(RISCV_HWPROBE_EXT_ZKSH)); + enable_feature(Feature::zkt, test(RISCV_HWPROBE_EXT_ZKT)); + + enable_feature(Feature::zcmop, test(RISCV_HWPROBE_EXT_ZCMOP)); + enable_feature(Feature::zca, test(RISCV_HWPROBE_EXT_ZCA)); + enable_feature(Feature::zcf, test(RISCV_HWPROBE_EXT_ZCF)); + enable_feature(Feature::zcd, test(RISCV_HWPROBE_EXT_ZCD)); + enable_feature(Feature::zcb, test(RISCV_HWPROBE_EXT_ZCB)); + + enable_feature(Feature::zfh, test(RISCV_HWPROBE_EXT_ZFH)); + enable_feature(Feature::zfhmin, test(RISCV_HWPROBE_EXT_ZFHMIN)); + enable_feature(Feature::zfa, test(RISCV_HWPROBE_EXT_ZFA)); + enable_feature(Feature::zfbfmin, test(RISCV_HWPROBE_EXT_ZFBFMIN)); + + // Use prctl (if any) to determine whether the vector extension + // is enabled on the current thread (assuming the entire process + // share the same status). If prctl fails (e.g. QEMU userland emulator + // as of version 9.2.3), use auxiliary vector to retrieve the default + // vector status on the process startup. + let has_vectors = { + let v_status = unsafe { libc::prctl(PR_RISCV_V_GET_CONTROL) }; + if v_status >= 0 { + (v_status & PR_RISCV_V_VSTATE_CTRL_CUR_MASK) == PR_RISCV_V_VSTATE_CTRL_ON + } else { + has_v + } + }; + if has_vectors { + enable_feature(Feature::v, test(RISCV_HWPROBE_IMA_V)); + enable_feature(Feature::zve32x, test(RISCV_HWPROBE_EXT_ZVE32X)); + enable_feature(Feature::zve32f, test(RISCV_HWPROBE_EXT_ZVE32F)); + enable_feature(Feature::zve64x, test(RISCV_HWPROBE_EXT_ZVE64X)); + enable_feature(Feature::zve64f, test(RISCV_HWPROBE_EXT_ZVE64F)); + enable_feature(Feature::zve64d, test(RISCV_HWPROBE_EXT_ZVE64D)); + + enable_feature(Feature::zvbb, test(RISCV_HWPROBE_EXT_ZVBB)); + enable_feature(Feature::zvbc, test(RISCV_HWPROBE_EXT_ZVBC)); + enable_feature(Feature::zvkb, test(RISCV_HWPROBE_EXT_ZVKB)); + enable_feature(Feature::zvkg, test(RISCV_HWPROBE_EXT_ZVKG)); + enable_feature(Feature::zvkned, test(RISCV_HWPROBE_EXT_ZVKNED)); + enable_feature(Feature::zvknha, test(RISCV_HWPROBE_EXT_ZVKNHA)); + enable_feature(Feature::zvknhb, test(RISCV_HWPROBE_EXT_ZVKNHB)); + enable_feature(Feature::zvksed, test(RISCV_HWPROBE_EXT_ZVKSED)); + enable_feature(Feature::zvksh, test(RISCV_HWPROBE_EXT_ZVKSH)); + enable_feature(Feature::zvkt, test(RISCV_HWPROBE_EXT_ZVKT)); + + enable_feature(Feature::zvfh, test(RISCV_HWPROBE_EXT_ZVFH)); + enable_feature(Feature::zvfhmin, test(RISCV_HWPROBE_EXT_ZVFHMIN)); + enable_feature(Feature::zvfbfmin, test(RISCV_HWPROBE_EXT_ZVFBFMIN)); + enable_feature(Feature::zvfbfwma, test(RISCV_HWPROBE_EXT_ZVFBFWMA)); + } + is_v_set = true; + }; + + // Set V purely depending on the auxiliary vector + // only if no fine-grained vector extension detection is available. + if !is_v_set { + enable_feature(Feature::v, has_v); + } + + // Handle base ISA. + // If future RV128I is supported, implement with `enable_feature` here. + // Note that we should use `target_arch` instead of `target_pointer_width` + // to avoid misdetection caused by experimental ABIs such as RV64ILP32. + #[cfg(target_arch = "riscv64")] + enable_feature(Feature::rv64i, has_i); + #[cfg(target_arch = "riscv32")] + enable_feature(Feature::rv32i, has_i); + + imply_features(value) +} diff --git a/library/std_detect/src/detect/os/linux/s390x.rs b/library/std_detect/src/detect/os/linux/s390x.rs new file mode 100644 index 00000000000..9b53f526d61 --- /dev/null +++ b/library/std_detect/src/detect/os/linux/s390x.rs @@ -0,0 +1,152 @@ +//! Run-time feature detection for s390x on Linux. + +use super::auxvec; +use crate::detect::{Feature, bit, cache}; + +/// Try to read the features from the auxiliary vector +pub(crate) fn detect_features() -> cache::Initializer { + let opt_hwcap: Option<AtHwcap> = auxvec::auxv().ok().map(Into::into); + let facilities = ExtendedFacilityList::new(); + cache(opt_hwcap, facilities) +} + +#[derive(Debug, Default, PartialEq)] +struct AtHwcap { + esan3: bool, + zarch: bool, + stfle: bool, + msa: bool, + ldisp: bool, + eimm: bool, + dfp: bool, + hpage: bool, + etf3eh: bool, + high_gprs: bool, + te: bool, + vxrs: bool, + vxrs_bcd: bool, + vxrs_ext: bool, + gs: bool, + vxrs_ext2: bool, + vxrs_pde: bool, + sort: bool, + dflt: bool, + vxrs_pde2: bool, + nnpa: bool, + pci_mio: bool, + sie: bool, +} + +impl From<auxvec::AuxVec> for AtHwcap { + /// Reads AtHwcap from the auxiliary vector. + fn from(auxv: auxvec::AuxVec) -> Self { + AtHwcap { + esan3: bit::test(auxv.hwcap, 0), + zarch: bit::test(auxv.hwcap, 1), + stfle: bit::test(auxv.hwcap, 2), + msa: bit::test(auxv.hwcap, 3), + ldisp: bit::test(auxv.hwcap, 4), + eimm: bit::test(auxv.hwcap, 5), + dfp: bit::test(auxv.hwcap, 6), + hpage: bit::test(auxv.hwcap, 7), + etf3eh: bit::test(auxv.hwcap, 8), + high_gprs: bit::test(auxv.hwcap, 9), + te: bit::test(auxv.hwcap, 10), + vxrs: bit::test(auxv.hwcap, 11), + vxrs_bcd: bit::test(auxv.hwcap, 12), + vxrs_ext: bit::test(auxv.hwcap, 13), + gs: bit::test(auxv.hwcap, 14), + vxrs_ext2: bit::test(auxv.hwcap, 15), + vxrs_pde: bit::test(auxv.hwcap, 16), + sort: bit::test(auxv.hwcap, 17), + dflt: bit::test(auxv.hwcap, 18), + vxrs_pde2: bit::test(auxv.hwcap, 19), + nnpa: bit::test(auxv.hwcap, 20), + pci_mio: bit::test(auxv.hwcap, 21), + sie: bit::test(auxv.hwcap, 22), + } + } +} + +struct ExtendedFacilityList([u64; 4]); + +impl ExtendedFacilityList { + fn new() -> Self { + let mut result: [u64; 4] = [0; 4]; + // SAFETY: rust/llvm only support s390x version with the `stfle` instruction. + unsafe { + core::arch::asm!( + // equivalently ".insn s, 0xb2b00000, 0({1})", + "stfle 0({})", + in(reg_addr) result.as_mut_ptr() , + inout("r0") result.len() as u64 - 1 => _, + options(nostack) + ); + } + Self(result) + } + + const fn get_bit(&self, n: usize) -> bool { + // NOTE: bits are numbered from the left. + self.0[n / 64] & (1 << (63 - (n % 64))) != 0 + } +} + +/// Initializes the cache from the feature bits. +/// +/// These values are part of the platform-specific [asm/elf.h][kernel], and are a selection of the +/// fields found in the [Facility Indications]. +/// +/// [Facility Indications]: https://www.ibm.com/support/pages/sites/default/files/2021-05/SA22-7871-10.pdf#page=63 +/// [kernel]: https://github.com/torvalds/linux/blob/b62cef9a5c673f1b8083159f5dc03c1c5daced2f/arch/s390/include/asm/elf.h#L129 +fn cache(hwcap: Option<AtHwcap>, facilities: ExtendedFacilityList) -> cache::Initializer { + let mut value = cache::Initializer::default(); + + { + let mut enable_if_set = |bit_index, f| { + if facilities.get_bit(bit_index) { + value.set(f as u32); + } + }; + + // We use HWCAP for `vector` because it requires both hardware and kernel support. + if let Some(AtHwcap { vxrs: true, .. }) = hwcap { + // vector and related + + enable_if_set(129, Feature::vector); + + enable_if_set(135, Feature::vector_enhancements_1); + enable_if_set(148, Feature::vector_enhancements_2); + enable_if_set(198, Feature::vector_enhancements_3); + + enable_if_set(134, Feature::vector_packed_decimal); + enable_if_set(152, Feature::vector_packed_decimal_enhancement); + enable_if_set(192, Feature::vector_packed_decimal_enhancement_2); + enable_if_set(199, Feature::vector_packed_decimal_enhancement_3); + + enable_if_set(165, Feature::nnp_assist); + } + + // others + + enable_if_set(76, Feature::message_security_assist_extension3); + enable_if_set(77, Feature::message_security_assist_extension4); + enable_if_set(57, Feature::message_security_assist_extension5); + enable_if_set(146, Feature::message_security_assist_extension8); + enable_if_set(155, Feature::message_security_assist_extension9); + enable_if_set(86, Feature::message_security_assist_extension12); + + enable_if_set(58, Feature::miscellaneous_extensions_2); + enable_if_set(61, Feature::miscellaneous_extensions_3); + enable_if_set(84, Feature::miscellaneous_extensions_4); + + enable_if_set(45, Feature::high_word); + enable_if_set(73, Feature::transactional_execution); + enable_if_set(133, Feature::guarded_storage); + enable_if_set(150, Feature::enhanced_sort); + enable_if_set(151, Feature::deflate_conversion); + enable_if_set(201, Feature::concurrent_functions); + } + + value +} diff --git a/library/std_detect/src/detect/os/openbsd/aarch64.rs b/library/std_detect/src/detect/os/openbsd/aarch64.rs new file mode 100644 index 00000000000..cfe4ad10ad6 --- /dev/null +++ b/library/std_detect/src/detect/os/openbsd/aarch64.rs @@ -0,0 +1,55 @@ +//! Run-time feature detection for Aarch64 on OpenBSD. +//! +//! OpenBSD doesn't trap the mrs instruction, but exposes the system registers through sysctl. +//! https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8 +//! https://github.com/golang/go/commit/cd54ef1f61945459486e9eea2f016d99ef1da925 + +use crate::detect::cache; +use core::{mem::MaybeUninit, ptr}; + +// Defined in machine/cpu.h. +// https://github.com/openbsd/src/blob/72ccc03bd11da614f31f7ff76e3f6fce99bc1c79/sys/arch/arm64/include/cpu.h#L25-L40 +const CPU_ID_AA64ISAR0: libc::c_int = 2; +const CPU_ID_AA64ISAR1: libc::c_int = 3; +const CPU_ID_AA64MMFR2: libc::c_int = 7; +const CPU_ID_AA64PFR0: libc::c_int = 8; + +/// Try to read the features from the system registers. +pub(crate) fn detect_features() -> cache::Initializer { + // ID_AA64ISAR0_EL1 and ID_AA64ISAR1_EL1 are supported on OpenBSD 7.1+. + // https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8 + // Others are supported on OpenBSD 7.3+. + // https://github.com/openbsd/src/commit/c7654cd65262d532212f65123ee3905ba200365c + // sysctl returns an unsupported error if operation is not supported, + // so we can safely use this function on older versions of OpenBSD. + let aa64isar0 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64ISAR0]).unwrap_or(0); + let aa64isar1 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64ISAR1]).unwrap_or(0); + let aa64mmfr2 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64MMFR2]).unwrap_or(0); + // Do not use unwrap_or(0) because in fp and asimd fields, 0 indicates that + // the feature is available. + let aa64pfr0 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64PFR0]); + + super::aarch64::parse_system_registers(aa64isar0, aa64isar1, aa64mmfr2, aa64pfr0) +} + +#[inline] +fn sysctl64(mib: &[libc::c_int]) -> Option<u64> { + const OUT_LEN: libc::size_t = core::mem::size_of::<u64>(); + let mut out = MaybeUninit::<u64>::uninit(); + let mut out_len = OUT_LEN; + let res = unsafe { + libc::sysctl( + mib.as_ptr(), + mib.len() as libc::c_uint, + out.as_mut_ptr() as *mut libc::c_void, + &mut out_len, + ptr::null_mut(), + 0, + ) + }; + if res == -1 || out_len != OUT_LEN { + return None; + } + // SAFETY: we've checked that sysctl was successful and `out` was filled. + Some(unsafe { out.assume_init() }) +} diff --git a/library/std_detect/src/detect/os/other.rs b/library/std_detect/src/detect/os/other.rs new file mode 100644 index 00000000000..091fafc4ebf --- /dev/null +++ b/library/std_detect/src/detect/os/other.rs @@ -0,0 +1,8 @@ +//! Other operating systems + +use crate::detect::cache; + +#[allow(dead_code)] +pub(crate) fn detect_features() -> cache::Initializer { + cache::Initializer::default() +} diff --git a/library/std_detect/src/detect/os/riscv.rs b/library/std_detect/src/detect/os/riscv.rs new file mode 100644 index 00000000000..4c59ede8029 --- /dev/null +++ b/library/std_detect/src/detect/os/riscv.rs @@ -0,0 +1,203 @@ +//! Run-time feature detection utility for RISC-V. +//! +//! On RISC-V, full feature detection needs a help of one or more +//! feature detection mechanisms (usually provided by the operating system). +//! +//! RISC-V architecture defines many extensions and some have dependency to others. +//! More importantly, some of them cannot be enabled without resolving such +//! dependencies due to limited set of features that such mechanisms provide. +//! +//! This module provides an OS-independent utility to process such relations +//! between RISC-V extensions. + +use crate::detect::{Feature, cache}; + +/// Imply features by the given set of enabled features. +/// +/// Note that it does not perform any consistency checks including existence of +/// conflicting extensions and/or complicated requirements. Eliminating such +/// inconsistencies is the responsibility of the feature detection logic and +/// its provider(s). +pub(crate) fn imply_features(mut value: cache::Initializer) -> cache::Initializer { + loop { + // Check convergence of the feature flags later. + let prev = value; + + // Expect that the optimizer turns repeated operations into + // a fewer number of bit-manipulation operations. + macro_rules! imply { + // Regular implication: + // A1 => (B1[, B2...]), A2 => (B1[, B2...]) and so on. + ($($from: ident)|+ => $($to: ident)&+) => { + if [$(Feature::$from as u32),+].iter().any(|&x| value.test(x)) { + $( + value.set(Feature::$to as u32); + )+ + } + }; + // Implication with multiple requirements: + // A1 && A2 ... => (B1[, B2...]). + ($($from: ident)&+ => $($to: ident)&+) => { + if [$(Feature::$from as u32),+].iter().all(|&x| value.test(x)) { + $( + value.set(Feature::$to as u32); + )+ + } + }; + } + macro_rules! group { + ($group: ident == $($member: ident)&+) => { + // Forward implication as defined in the specifications. + imply!($group => $($member)&+); + // Reverse implication to "group extension" from its members. + // This is not a part of specifications but convenient for + // feature detection and implemented in e.g. LLVM. + imply!($($member)&+ => $group); + }; + } + + /* + If a dependency/implication is not explicitly stated in the + specification, it is denoted as a comment as follows: + "defined as subset": + The latter extension is described as a subset of the former + (but the evidence is weak). + "functional": + The former extension is functionally a superset of the latter + (no direct references though). + */ + + imply!(zvbb => zvkb); + + // Certain set of vector cryptography extensions form a group. + group!(zvkn == zvkned & zvknhb & zvkb & zvkt); + group!(zvknc == zvkn & zvbc); + group!(zvkng == zvkn & zvkg); + group!(zvks == zvksed & zvksh & zvkb & zvkt); + group!(zvksc == zvks & zvbc); + group!(zvksg == zvks & zvkg); + + imply!(zvknhb => zvknha); // functional + + // For vector cryptography, Zvknhb and Zvbc require integer arithmetic + // with EEW=64 (Zve64x) while others not depending on them + // require EEW=32 (Zve32x). + imply!(zvknhb | zvbc => zve64x); + imply!(zvbb | zvkb | zvkg | zvkned | zvknha | zvksed | zvksh => zve32x); + + imply!(zbc => zbkc); // defined as subset + group!(zkn == zbkb & zbkc & zbkx & zkne & zknd & zknh); + group!(zks == zbkb & zbkc & zbkx & zksed & zksh); + group!(zk == zkn & zkr & zkt); + + imply!(zacas => zaamo); + group!(a == zalrsc & zaamo); + + group!(b == zba & zbb & zbs); + + imply!(zcf => zca & f); + imply!(zcd => zca & d); + imply!(zcmop | zcb => zca); + + imply!(zhinx => zhinxmin); + imply!(zdinx | zhinxmin => zfinx); + + imply!(zvfh => zvfhmin); // functional + imply!(zvfh => zve32f & zfhmin); + imply!(zvfhmin => zve32f); + imply!(zvfbfwma => zvfbfmin & zfbfmin); + imply!(zvfbfmin => zve32f); + + imply!(v => zve64d); + imply!(zve64d => zve64f & d); + imply!(zve64f => zve64x & zve32f); + imply!(zve64x => zve32x); + imply!(zve32f => zve32x & f); + + imply!(zfh => zfhmin); + imply!(q => d); + imply!(d | zfhmin | zfa => f); + imply!(zfbfmin => f); // and some of (not all) "Zfh" instructions. + + // Relatively complex implication rules from the "C" extension. + imply!(c => zca); + imply!(c & d => zcd); + #[cfg(target_arch = "riscv32")] + imply!(c & f => zcf); + + imply!(zicntr | zihpm | f | zfinx | zve32x => zicsr); + + // Loop until the feature flags converge. + if prev == value { + return value; + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn simple_direct() { + let mut value = cache::Initializer::default(); + value.set(Feature::f as u32); + // F (and other extensions with CSRs) -> Zicsr + assert!(imply_features(value).test(Feature::zicsr as u32)); + } + + #[test] + fn simple_indirect() { + let mut value = cache::Initializer::default(); + value.set(Feature::q as u32); + // Q -> D, D -> F, F -> Zicsr + assert!(imply_features(value).test(Feature::zicsr as u32)); + } + + #[test] + fn complex_zcd() { + let mut value = cache::Initializer::default(); + // C & D -> Zcd + value.set(Feature::c as u32); + assert!(!imply_features(value).test(Feature::zcd as u32)); + value.set(Feature::d as u32); + assert!(imply_features(value).test(Feature::zcd as u32)); + } + + #[test] + fn group_simple_forward() { + let mut value = cache::Initializer::default(); + // A -> Zalrsc & Zaamo (forward implication) + value.set(Feature::a as u32); + let value = imply_features(value); + assert!(value.test(Feature::zalrsc as u32)); + assert!(value.test(Feature::zaamo as u32)); + } + + #[test] + fn group_simple_backward() { + let mut value = cache::Initializer::default(); + // Zalrsc & Zaamo -> A (reverse implication) + value.set(Feature::zalrsc as u32); + value.set(Feature::zaamo as u32); + assert!(imply_features(value).test(Feature::a as u32)); + } + + #[test] + fn group_complex_convergence() { + let mut value = cache::Initializer::default(); + // Needs 3 iterations to converge + // (and 4th iteration for convergence checking): + // 1. [Zvksc] -> Zvks & Zvbc + // 2. Zvks -> Zvksed & Zvksh & Zvkb & Zvkt + // 3a. [Zvkned] & [Zvknhb] & [Zvkb] & Zvkt -> {Zvkn} + // 3b. Zvkn & Zvbc -> {Zvknc} + value.set(Feature::zvksc as u32); + value.set(Feature::zvkned as u32); + value.set(Feature::zvknhb as u32); + value.set(Feature::zvkb as u32); + let value = imply_features(value); + assert!(value.test(Feature::zvkn as u32)); + assert!(value.test(Feature::zvknc as u32)); + } +} diff --git a/library/std_detect/src/detect/os/windows/aarch64.rs b/library/std_detect/src/detect/os/windows/aarch64.rs new file mode 100644 index 00000000000..937f9f26eed --- /dev/null +++ b/library/std_detect/src/detect/os/windows/aarch64.rs @@ -0,0 +1,125 @@ +//! Run-time feature detection for Aarch64 on Windows. + +use crate::detect::{Feature, cache}; + +/// Try to read the features using IsProcessorFeaturePresent. +pub(crate) fn detect_features() -> cache::Initializer { + type DWORD = u32; + type BOOL = i32; + + const FALSE: BOOL = 0; + // The following Microsoft documents isn't updated for aarch64. + // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent + // These are defined in winnt.h of Windows SDK + const PF_ARM_VFP_32_REGISTERS_AVAILABLE: u32 = 18; + const PF_ARM_NEON_INSTRUCTIONS_AVAILABLE: u32 = 19; + const PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE: u32 = 30; + const PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE: u32 = 31; + const PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE: u32 = 34; + const PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE: u32 = 43; + const PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE: u32 = 44; + const PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE: u32 = 45; + const PF_ARM_SVE_INSTRUCTIONS_AVAILABLE: u32 = 46; + const PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE: u32 = 47; + const PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE: u32 = 48; + const PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE: u32 = 49; + const PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE: u32 = 50; + const PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE: u32 = 51; + // const PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE: u32 = 52; + // const PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE: u32 = 53; + const PF_ARM_SVE_B16B16_INSTRUCTIONS_AVAILABLE: u32 = 54; + const PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE: u32 = 55; + const PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE: u32 = 56; + // const PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE: u32 = 57; + // const PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE: u32 = 58; + // const PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE: u32 = 59; + + unsafe extern "system" { + fn IsProcessorFeaturePresent(ProcessorFeature: DWORD) -> BOOL; + } + + let mut value = cache::Initializer::default(); + { + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + // Some features may be supported on current CPU, + // but no way to detect it by OS API. + // Also, we require unsafe block for the extern "system" calls. + unsafe { + enable_feature( + Feature::fp, + IsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::asimd, + IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::crc, + IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::lse, + IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::dotprod, + IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::jsconv, + IsProcessorFeaturePresent(PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::rcpc, + IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve, + IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve2, + IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve2p1, + IsProcessorFeaturePresent(PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve2_aes, + IsProcessorFeaturePresent(PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE) != FALSE + && IsProcessorFeaturePresent(PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE) + != FALSE, + ); + enable_feature( + Feature::sve2_bitperm, + IsProcessorFeaturePresent(PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve_b16b16, + IsProcessorFeaturePresent(PF_ARM_SVE_B16B16_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve2_sha3, + IsProcessorFeaturePresent(PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sve2_sm4, + IsProcessorFeaturePresent(PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE means aes, sha1, sha2 and + // pmull support + let crypto = + IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != FALSE; + enable_feature(Feature::aes, crypto); + enable_feature(Feature::pmull, crypto); + enable_feature(Feature::sha2, crypto); + } + } + value +} diff --git a/library/std_detect/src/detect/os/x86.rs b/library/std_detect/src/detect/os/x86.rs new file mode 100644 index 00000000000..8565c2f85e2 --- /dev/null +++ b/library/std_detect/src/detect/os/x86.rs @@ -0,0 +1,335 @@ +//! x86 run-time feature detection is OS independent. + +#[cfg(target_arch = "x86")] +use core::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::*; + +use core::mem; + +use crate::detect::{Feature, bit, cache}; + +/// Run-time feature detection on x86 works by using the CPUID instruction. +/// +/// The [CPUID Wikipedia page][wiki_cpuid] contains +/// all the information about which flags to set to query which values, and in +/// which registers these are reported. +/// +/// The definitive references are: +/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: +/// Instruction Set Reference, A-Z][intel64_ref]. +/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and +/// System Instructions][amd64_ref]. +/// +/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID +/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf +#[allow(clippy::similar_names)] +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + + if cfg!(target_env = "sgx") { + // doesn't support this because it is untrusted data + return value; + } + + // Calling `__cpuid`/`__cpuid_count` from here on is safe because the CPU + // has `cpuid` support. + + // 0. EAX = 0: Basic Information: + // - EAX returns the "Highest Function Parameter", that is, the maximum + // leaf value for subsequent calls of `cpuinfo` in range [0, + // 0x8000_0000]. - The vendor ID is stored in 12 u8 ascii chars, + // returned in EBX, EDX, and ECX (in that order): + let (max_basic_leaf, vendor_id) = unsafe { + let CpuidResult { + eax: max_basic_leaf, + ebx, + ecx, + edx, + } = __cpuid(0); + let vendor_id: [[u8; 4]; 3] = [ebx.to_ne_bytes(), edx.to_ne_bytes(), ecx.to_ne_bytes()]; + let vendor_id: [u8; 12] = mem::transmute(vendor_id); + (max_basic_leaf, vendor_id) + }; + + if max_basic_leaf < 1 { + // Earlier Intel 486, CPUID not implemented + return value; + } + + // EAX = 1, ECX = 0: Queries "Processor Info and Feature Bits"; + // Contains information about most x86 features. + let CpuidResult { + ecx: proc_info_ecx, + edx: proc_info_edx, + .. + } = unsafe { __cpuid(0x0000_0001_u32) }; + + // EAX = 7: Queries "Extended Features"; + // Contains information about bmi,bmi2, and avx2 support. + let ( + extended_features_ebx, + extended_features_ecx, + extended_features_edx, + extended_features_eax_leaf_1, + extended_features_edx_leaf_1, + ) = if max_basic_leaf >= 7 { + let CpuidResult { ebx, ecx, edx, .. } = unsafe { __cpuid(0x0000_0007_u32) }; + let CpuidResult { + eax: eax_1, + edx: edx_1, + .. + } = unsafe { __cpuid_count(0x0000_0007_u32, 0x0000_0001_u32) }; + (ebx, ecx, edx, eax_1, edx_1) + } else { + (0, 0, 0, 0, 0) // CPUID does not support "Extended Features" + }; + + // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported + // - EAX returns the max leaf value for extended information, that is, + // `cpuid` calls in range [0x8000_0000; u32::MAX]: + let CpuidResult { + eax: extended_max_basic_leaf, + .. + } = unsafe { __cpuid(0x8000_0000_u32) }; + + // EAX = 0x8000_0001, ECX=0: Queries "Extended Processor Info and Feature + // Bits" + let extended_proc_info_ecx = if extended_max_basic_leaf >= 1 { + let CpuidResult { ecx, .. } = unsafe { __cpuid(0x8000_0001_u32) }; + ecx + } else { + 0 + }; + + { + // borrows value till the end of this scope: + let mut enable = |r, rb, f| { + let present = bit::test(r as usize, rb); + if present { + value.set(f as u32); + } + present + }; + + enable(proc_info_ecx, 0, Feature::sse3); + enable(proc_info_ecx, 1, Feature::pclmulqdq); + enable(proc_info_ecx, 9, Feature::ssse3); + enable(proc_info_ecx, 13, Feature::cmpxchg16b); + enable(proc_info_ecx, 19, Feature::sse4_1); + enable(proc_info_ecx, 20, Feature::sse4_2); + enable(proc_info_ecx, 22, Feature::movbe); + enable(proc_info_ecx, 23, Feature::popcnt); + enable(proc_info_ecx, 25, Feature::aes); + let f16c = enable(proc_info_ecx, 29, Feature::f16c); + enable(proc_info_ecx, 30, Feature::rdrand); + enable(extended_features_ebx, 18, Feature::rdseed); + enable(extended_features_ebx, 19, Feature::adx); + enable(extended_features_ebx, 11, Feature::rtm); + enable(proc_info_edx, 4, Feature::tsc); + enable(proc_info_edx, 23, Feature::mmx); + enable(proc_info_edx, 24, Feature::fxsr); + enable(proc_info_edx, 25, Feature::sse); + enable(proc_info_edx, 26, Feature::sse2); + enable(extended_features_ebx, 29, Feature::sha); + + enable(extended_features_ecx, 8, Feature::gfni); + enable(extended_features_ecx, 9, Feature::vaes); + enable(extended_features_ecx, 10, Feature::vpclmulqdq); + + enable(extended_features_ebx, 3, Feature::bmi1); + enable(extended_features_ebx, 8, Feature::bmi2); + + enable(extended_features_ebx, 9, Feature::ermsb); + + enable(extended_features_eax_leaf_1, 31, Feature::movrs); + + // Detect if CPUID.19h available + if bit::test(extended_features_ecx as usize, 23) { + let CpuidResult { ebx, .. } = unsafe { __cpuid(0x19) }; + enable(ebx, 0, Feature::kl); + enable(ebx, 2, Feature::widekl); + } + + // `XSAVE` and `AVX` support: + let cpu_xsave = bit::test(proc_info_ecx as usize, 26); + if cpu_xsave { + // 0. Here the CPU supports `XSAVE`. + + // 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and + // supports saving the state of the AVX/AVX2 vector registers on + // context-switches, see: + // + // - [intel: is avx enabled?][is_avx_enabled], + // - [mozilla: sse.cpp][mozilla_sse_cpp]. + // + // [is_avx_enabled]: https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled + // [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190 + let cpu_osxsave = bit::test(proc_info_ecx as usize, 27); + + if cpu_osxsave { + // 2. The OS must have signaled the CPU that it supports saving and + // restoring the: + // + // * SSE -> `XCR0.SSE[1]` + // * AVX -> `XCR0.AVX[2]` + // * AVX-512 -> `XCR0.AVX-512[7:5]`. + // * AMX -> `XCR0.AMX[18:17]` + // + // by setting the corresponding bits of `XCR0` to `1`. + // + // This is safe because the CPU supports `xsave` + // and the OS has set `osxsave`. + let xcr0 = unsafe { _xgetbv(0) }; + // Test `XCR0.SSE[1]` and `XCR0.AVX[2]` with the mask `0b110 == 6`: + let os_avx_support = xcr0 & 6 == 6; + // Test `XCR0.AVX-512[7:5]` with the mask `0b1110_0000 == 0xe0`: + let os_avx512_support = xcr0 & 0xe0 == 0xe0; + // Test `XCR0.AMX[18:17]` with the mask `0b110_0000_0000_0000_0000 == 0x60000` + let os_amx_support = xcr0 & 0x60000 == 0x60000; + + // Only if the OS and the CPU support saving/restoring the AVX + // registers we enable `xsave` support: + if os_avx_support { + // See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED + // FEATURES" in the "Intel® 64 and IA-32 Architectures Software + // Developer’s Manual, Volume 1: Basic Architecture": + // + // "Software enables the XSAVE feature set by setting + // CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4 + // instruction). If this bit is 0, execution of any of XGETBV, + // XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV + // causes an invalid-opcode exception (#UD)" + // + enable(proc_info_ecx, 26, Feature::xsave); + + // For `xsaveopt`, `xsavec`, and `xsaves` we need to query: + // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, + // ECX = 1): + if max_basic_leaf >= 0xd { + let CpuidResult { + eax: proc_extended_state1_eax, + .. + } = unsafe { __cpuid_count(0xd_u32, 1) }; + enable(proc_extended_state1_eax, 0, Feature::xsaveopt); + enable(proc_extended_state1_eax, 1, Feature::xsavec); + enable(proc_extended_state1_eax, 3, Feature::xsaves); + } + + // FMA (uses 256-bit wide registers): + let fma = enable(proc_info_ecx, 12, Feature::fma); + + // And AVX/AVX2: + enable(proc_info_ecx, 28, Feature::avx); + enable(extended_features_ebx, 5, Feature::avx2); + + // "Short" versions of AVX512 instructions + enable(extended_features_eax_leaf_1, 4, Feature::avxvnni); + enable(extended_features_eax_leaf_1, 23, Feature::avxifma); + enable(extended_features_edx_leaf_1, 4, Feature::avxvnniint8); + enable(extended_features_edx_leaf_1, 5, Feature::avxneconvert); + enable(extended_features_edx_leaf_1, 10, Feature::avxvnniint16); + + enable(extended_features_eax_leaf_1, 0, Feature::sha512); + enable(extended_features_eax_leaf_1, 1, Feature::sm3); + enable(extended_features_eax_leaf_1, 2, Feature::sm4); + + // For AVX-512 the OS also needs to support saving/restoring + // the extended state, only then we enable AVX-512 support: + // Also, Rust makes `avx512f` imply `fma` and `f16c`, because + // otherwise the assembler is broken. But Intel doesn't guarantee + // that `fma` and `f16c` are available with `avx512f`, so we + // need to check for them separately. + if os_avx512_support && f16c && fma { + enable(extended_features_ebx, 16, Feature::avx512f); + enable(extended_features_ebx, 17, Feature::avx512dq); + enable(extended_features_ebx, 21, Feature::avx512ifma); + enable(extended_features_ebx, 26, Feature::avx512pf); + enable(extended_features_ebx, 27, Feature::avx512er); + enable(extended_features_ebx, 28, Feature::avx512cd); + enable(extended_features_ebx, 30, Feature::avx512bw); + enable(extended_features_ebx, 31, Feature::avx512vl); + enable(extended_features_ecx, 1, Feature::avx512vbmi); + enable(extended_features_ecx, 6, Feature::avx512vbmi2); + enable(extended_features_ecx, 11, Feature::avx512vnni); + enable(extended_features_ecx, 12, Feature::avx512bitalg); + enable(extended_features_ecx, 14, Feature::avx512vpopcntdq); + enable(extended_features_edx, 8, Feature::avx512vp2intersect); + enable(extended_features_edx, 23, Feature::avx512fp16); + enable(extended_features_eax_leaf_1, 5, Feature::avx512bf16); + } + } + + if os_amx_support { + enable(extended_features_edx, 24, Feature::amx_tile); + enable(extended_features_edx, 25, Feature::amx_int8); + enable(extended_features_edx, 22, Feature::amx_bf16); + enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16); + enable(extended_features_edx_leaf_1, 8, Feature::amx_complex); + + if max_basic_leaf >= 0x1e { + let CpuidResult { + eax: amx_feature_flags_eax, + .. + } = unsafe { __cpuid_count(0x1e_u32, 1) }; + + enable(amx_feature_flags_eax, 4, Feature::amx_fp8); + enable(amx_feature_flags_eax, 5, Feature::amx_transpose); + enable(amx_feature_flags_eax, 6, Feature::amx_tf32); + enable(amx_feature_flags_eax, 7, Feature::amx_avx512); + enable(amx_feature_flags_eax, 8, Feature::amx_movrs); + } + } + } + } + + // This detects ABM on AMD CPUs and LZCNT on Intel CPUs. + // On intel CPUs with popcnt, lzcnt implements the + // "missing part" of ABM, so we map both to the same + // internal feature. + // + // The `is_x86_feature_detected!("lzcnt")` macro then + // internally maps to Feature::abm. + enable(extended_proc_info_ecx, 5, Feature::lzcnt); + + // As Hygon Dhyana originates from AMD technology and shares most of the architecture with + // AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series + // number(Family 18h). + // + // For CPUID feature bits, Hygon Dhyana(family 18h) share the same definition with AMD + // family 17h. + // + // Related AMD CPUID specification is https://www.amd.com/system/files/TechDocs/25481.pdf. + // Related Hygon kernel patch can be found on + // http://lkml.kernel.org/r/5ce86123a7b9dad925ac583d88d2f921040e859b.1538583282.git.puwen@hygon.cn + if vendor_id == *b"AuthenticAMD" || vendor_id == *b"HygonGenuine" { + // These features are available on AMD arch CPUs: + enable(extended_proc_info_ecx, 6, Feature::sse4a); + enable(extended_proc_info_ecx, 21, Feature::tbm); + enable(extended_proc_info_ecx, 11, Feature::xop); + } + } + + // Unfortunately, some Skylake chips erroneously report support for BMI1 and + // BMI2 without actual support. These chips don't support AVX, and it seems + // that all Intel chips with non-erroneous support BMI do (I didn't check + // other vendors), so we can disable these flags for chips that don't also + // report support for AVX. + // + // It's possible this will pessimize future chips that do support BMI and + // not AVX, but this seems minor compared to a hard crash you get when + // executing an unsupported instruction (to put it another way, it's safe + // for us to under-report CPU features, but not to over-report them). Still, + // to limit any impact this may have in the future, we only do this for + // Intel chips, as it's a bug only present in their chips. + // + // This bug is documented as `SKL052` in the errata section of this document: + // http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/desktop-6th-gen-core-family-spec-update.pdf + if vendor_id == *b"GenuineIntel" && !value.test(Feature::avx as u32) { + value.unset(Feature::bmi1 as u32); + value.unset(Feature::bmi2 as u32); + } + + value +} |
