about summary refs log tree commit diff
path: root/library/std_detect/src/detect/os/linux/aarch64.rs
diff options
context:
space:
mode:
Diffstat (limited to 'library/std_detect/src/detect/os/linux/aarch64.rs')
-rw-r--r--library/std_detect/src/detect/os/linux/aarch64.rs484
1 files changed, 484 insertions, 0 deletions
diff --git a/library/std_detect/src/detect/os/linux/aarch64.rs b/library/std_detect/src/detect/os/linux/aarch64.rs
new file mode 100644
index 00000000000..22a9cefff7b
--- /dev/null
+++ b/library/std_detect/src/detect/os/linux/aarch64.rs
@@ -0,0 +1,484 @@
+//! Run-time feature detection for Aarch64 on Linux.
+
+use super::auxvec;
+use crate::detect::{Feature, bit, cache};
+
+/// Try to read the features from the auxiliary vector.
+pub(crate) fn detect_features() -> cache::Initializer {
+    #[cfg(target_os = "android")]
+    let is_exynos9810 = {
+        // Samsung Exynos 9810 has a bug that big and little cores have different
+        // ISAs. And on older Android (pre-9), the kernel incorrectly reports
+        // that features available only on some cores are available on all cores.
+        // https://reviews.llvm.org/D114523
+        let mut arch = [0_u8; libc::PROP_VALUE_MAX as usize];
+        let len = unsafe {
+            libc::__system_property_get(c"ro.arch".as_ptr(), arch.as_mut_ptr() as *mut libc::c_char)
+        };
+        // On Exynos, ro.arch is not available on Android 12+, but it is fine
+        // because Android 9+ includes the fix.
+        len > 0 && arch.starts_with(b"exynos9810")
+    };
+    #[cfg(not(target_os = "android"))]
+    let is_exynos9810 = false;
+
+    if let Ok(auxv) = auxvec::auxv() {
+        let hwcap: AtHwcap = auxv.into();
+        return hwcap.cache(is_exynos9810);
+    }
+    cache::Initializer::default()
+}
+
+/// These values are part of the platform-specific [asm/hwcap.h][hwcap] .
+///
+/// The names match those used for cpuinfo.
+///
+/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
+#[derive(Debug, Default, PartialEq)]
+struct AtHwcap {
+    // AT_HWCAP
+    fp: bool,
+    asimd: bool,
+    // evtstrm: No LLVM support.
+    aes: bool,
+    pmull: bool,
+    sha1: bool,
+    sha2: bool,
+    crc32: bool,
+    atomics: bool,
+    fphp: bool,
+    asimdhp: bool,
+    // cpuid: No LLVM support.
+    asimdrdm: bool,
+    jscvt: bool,
+    fcma: bool,
+    lrcpc: bool,
+    dcpop: bool,
+    sha3: bool,
+    sm3: bool,
+    sm4: bool,
+    asimddp: bool,
+    sha512: bool,
+    sve: bool,
+    fhm: bool,
+    dit: bool,
+    uscat: bool,
+    ilrcpc: bool,
+    flagm: bool,
+    ssbs: bool,
+    sb: bool,
+    paca: bool,
+    pacg: bool,
+
+    // AT_HWCAP2
+    dcpodp: bool,
+    sve2: bool,
+    sveaes: bool,
+    svepmull: bool,
+    svebitperm: bool,
+    svesha3: bool,
+    svesm4: bool,
+    flagm2: bool,
+    frint: bool,
+    // svei8mm: See i8mm feature.
+    svef32mm: bool,
+    svef64mm: bool,
+    // svebf16: See bf16 feature.
+    i8mm: bool,
+    bf16: bool,
+    // dgh: No LLVM support.
+    rng: bool,
+    bti: bool,
+    mte: bool,
+    ecv: bool,
+    // afp: bool,
+    // rpres: bool,
+    // mte3: bool,
+    sme: bool,
+    smei16i64: bool,
+    smef64f64: bool,
+    // smei8i32: bool,
+    // smef16f32: bool,
+    // smeb16f32: bool,
+    // smef32f32: bool,
+    smefa64: bool,
+    wfxt: bool,
+    // ebf16: bool,
+    // sveebf16: bool,
+    cssc: bool,
+    // rprfm: bool,
+    sve2p1: bool,
+    sme2: bool,
+    sme2p1: bool,
+    // smei16i32: bool,
+    // smebi32i32: bool,
+    smeb16b16: bool,
+    smef16f16: bool,
+    mops: bool,
+    hbc: bool,
+    sveb16b16: bool,
+    lrcpc3: bool,
+    lse128: bool,
+    fpmr: bool,
+    lut: bool,
+    faminmax: bool,
+    f8cvt: bool,
+    f8fma: bool,
+    f8dp4: bool,
+    f8dp2: bool,
+    f8e4m3: bool,
+    f8e5m2: bool,
+    smelutv2: bool,
+    smef8f16: bool,
+    smef8f32: bool,
+    smesf8fma: bool,
+    smesf8dp4: bool,
+    smesf8dp2: bool,
+    // pauthlr: bool,
+}
+
+impl From<auxvec::AuxVec> for AtHwcap {
+    /// Reads AtHwcap from the auxiliary vector.
+    fn from(auxv: auxvec::AuxVec) -> Self {
+        AtHwcap {
+            fp: bit::test(auxv.hwcap, 0),
+            asimd: bit::test(auxv.hwcap, 1),
+            // evtstrm: bit::test(auxv.hwcap, 2),
+            aes: bit::test(auxv.hwcap, 3),
+            pmull: bit::test(auxv.hwcap, 4),
+            sha1: bit::test(auxv.hwcap, 5),
+            sha2: bit::test(auxv.hwcap, 6),
+            crc32: bit::test(auxv.hwcap, 7),
+            atomics: bit::test(auxv.hwcap, 8),
+            fphp: bit::test(auxv.hwcap, 9),
+            asimdhp: bit::test(auxv.hwcap, 10),
+            // cpuid: bit::test(auxv.hwcap, 11),
+            asimdrdm: bit::test(auxv.hwcap, 12),
+            jscvt: bit::test(auxv.hwcap, 13),
+            fcma: bit::test(auxv.hwcap, 14),
+            lrcpc: bit::test(auxv.hwcap, 15),
+            dcpop: bit::test(auxv.hwcap, 16),
+            sha3: bit::test(auxv.hwcap, 17),
+            sm3: bit::test(auxv.hwcap, 18),
+            sm4: bit::test(auxv.hwcap, 19),
+            asimddp: bit::test(auxv.hwcap, 20),
+            sha512: bit::test(auxv.hwcap, 21),
+            sve: bit::test(auxv.hwcap, 22),
+            fhm: bit::test(auxv.hwcap, 23),
+            dit: bit::test(auxv.hwcap, 24),
+            uscat: bit::test(auxv.hwcap, 25),
+            ilrcpc: bit::test(auxv.hwcap, 26),
+            flagm: bit::test(auxv.hwcap, 27),
+            ssbs: bit::test(auxv.hwcap, 28),
+            sb: bit::test(auxv.hwcap, 29),
+            paca: bit::test(auxv.hwcap, 30),
+            pacg: bit::test(auxv.hwcap, 31),
+
+            // AT_HWCAP2
+            dcpodp: bit::test(auxv.hwcap2, 0),
+            sve2: bit::test(auxv.hwcap2, 1),
+            sveaes: bit::test(auxv.hwcap2, 2),
+            svepmull: bit::test(auxv.hwcap2, 3),
+            svebitperm: bit::test(auxv.hwcap2, 4),
+            svesha3: bit::test(auxv.hwcap2, 5),
+            svesm4: bit::test(auxv.hwcap2, 6),
+            flagm2: bit::test(auxv.hwcap2, 7),
+            frint: bit::test(auxv.hwcap2, 8),
+            // svei8mm: bit::test(auxv.hwcap2, 9),
+            svef32mm: bit::test(auxv.hwcap2, 10),
+            svef64mm: bit::test(auxv.hwcap2, 11),
+            // svebf16: bit::test(auxv.hwcap2, 12),
+            i8mm: bit::test(auxv.hwcap2, 13),
+            bf16: bit::test(auxv.hwcap2, 14),
+            // dgh: bit::test(auxv.hwcap2, 15),
+            rng: bit::test(auxv.hwcap2, 16),
+            bti: bit::test(auxv.hwcap2, 17),
+            mte: bit::test(auxv.hwcap2, 18),
+            ecv: bit::test(auxv.hwcap2, 19),
+            // afp: bit::test(auxv.hwcap2, 20),
+            // rpres: bit::test(auxv.hwcap2, 21),
+            // mte3: bit::test(auxv.hwcap2, 22),
+            sme: bit::test(auxv.hwcap2, 23),
+            smei16i64: bit::test(auxv.hwcap2, 24),
+            smef64f64: bit::test(auxv.hwcap2, 25),
+            // smei8i32: bit::test(auxv.hwcap2, 26),
+            // smef16f32: bit::test(auxv.hwcap2, 27),
+            // smeb16f32: bit::test(auxv.hwcap2, 28),
+            // smef32f32: bit::test(auxv.hwcap2, 29),
+            smefa64: bit::test(auxv.hwcap2, 30),
+            wfxt: bit::test(auxv.hwcap2, 31),
+            // ebf16: bit::test(auxv.hwcap2, 32),
+            // sveebf16: bit::test(auxv.hwcap2, 33),
+            cssc: bit::test(auxv.hwcap2, 34),
+            // rprfm: bit::test(auxv.hwcap2, 35),
+            sve2p1: bit::test(auxv.hwcap2, 36),
+            sme2: bit::test(auxv.hwcap2, 37),
+            sme2p1: bit::test(auxv.hwcap2, 38),
+            // smei16i32: bit::test(auxv.hwcap2, 39),
+            // smebi32i32: bit::test(auxv.hwcap2, 40),
+            smeb16b16: bit::test(auxv.hwcap2, 41),
+            smef16f16: bit::test(auxv.hwcap2, 42),
+            mops: bit::test(auxv.hwcap2, 43),
+            hbc: bit::test(auxv.hwcap2, 44),
+            sveb16b16: bit::test(auxv.hwcap2, 45),
+            lrcpc3: bit::test(auxv.hwcap2, 46),
+            lse128: bit::test(auxv.hwcap2, 47),
+            fpmr: bit::test(auxv.hwcap2, 48),
+            lut: bit::test(auxv.hwcap2, 49),
+            faminmax: bit::test(auxv.hwcap2, 50),
+            f8cvt: bit::test(auxv.hwcap2, 51),
+            f8fma: bit::test(auxv.hwcap2, 52),
+            f8dp4: bit::test(auxv.hwcap2, 53),
+            f8dp2: bit::test(auxv.hwcap2, 54),
+            f8e4m3: bit::test(auxv.hwcap2, 55),
+            f8e5m2: bit::test(auxv.hwcap2, 56),
+            smelutv2: bit::test(auxv.hwcap2, 57),
+            smef8f16: bit::test(auxv.hwcap2, 58),
+            smef8f32: bit::test(auxv.hwcap2, 59),
+            smesf8fma: bit::test(auxv.hwcap2, 60),
+            smesf8dp4: bit::test(auxv.hwcap2, 61),
+            smesf8dp2: bit::test(auxv.hwcap2, 62),
+            // pauthlr: bit::test(auxv.hwcap2, ??),
+        }
+    }
+}
+
+impl AtHwcap {
+    /// Initializes the cache from the feature -bits.
+    ///
+    /// The feature dependencies here come directly from LLVM's feature definitions:
+    /// https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AArch64/AArch64.td
+    fn cache(self, is_exynos9810: bool) -> cache::Initializer {
+        let mut value = cache::Initializer::default();
+        {
+            let mut enable_feature = |f, enable| {
+                if enable {
+                    value.set(f as u32);
+                }
+            };
+
+            // Samsung Exynos 9810 has a bug that big and little cores have different
+            // ISAs. And on older Android (pre-9), the kernel incorrectly reports
+            // that features available only on some cores are available on all cores.
+            // So, only check features that are known to be available on exynos-m3:
+            // $ rustc --print cfg --target aarch64-linux-android -C target-cpu=exynos-m3 | grep target_feature
+            // See also https://github.com/rust-lang/stdarch/pull/1378#discussion_r1103748342.
+            if is_exynos9810 {
+                enable_feature(Feature::fp, self.fp);
+                enable_feature(Feature::crc, self.crc32);
+                // ASIMD support requires float support - if half-floats are
+                // supported, it also requires half-float support:
+                let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp);
+                enable_feature(Feature::asimd, asimd);
+                // Cryptographic extensions require ASIMD
+                // AES also covers FEAT_PMULL
+                enable_feature(Feature::aes, self.aes && self.pmull && asimd);
+                enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd);
+                return value;
+            }
+
+            enable_feature(Feature::fp, self.fp);
+            // Half-float support requires float support
+            enable_feature(Feature::fp16, self.fp && self.fphp);
+            // FHM (fp16fml in LLVM) requires half float support
+            enable_feature(Feature::fhm, self.fphp && self.fhm);
+            enable_feature(Feature::pmull, self.pmull);
+            enable_feature(Feature::crc, self.crc32);
+            enable_feature(Feature::lse, self.atomics);
+            enable_feature(Feature::lse2, self.uscat);
+            enable_feature(Feature::lse128, self.lse128 && self.atomics);
+            enable_feature(Feature::rcpc, self.lrcpc);
+            // RCPC2 (rcpc-immo in LLVM) requires RCPC support
+            let rcpc2 = self.ilrcpc && self.lrcpc;
+            enable_feature(Feature::rcpc2, rcpc2);
+            enable_feature(Feature::rcpc3, self.lrcpc3 && rcpc2);
+            enable_feature(Feature::dit, self.dit);
+            enable_feature(Feature::flagm, self.flagm);
+            enable_feature(Feature::flagm2, self.flagm2);
+            enable_feature(Feature::ssbs, self.ssbs);
+            enable_feature(Feature::sb, self.sb);
+            enable_feature(Feature::paca, self.paca);
+            enable_feature(Feature::pacg, self.pacg);
+            // enable_feature(Feature::pauth_lr, self.pauthlr);
+            enable_feature(Feature::dpb, self.dcpop);
+            enable_feature(Feature::dpb2, self.dcpodp);
+            enable_feature(Feature::rand, self.rng);
+            enable_feature(Feature::bti, self.bti);
+            enable_feature(Feature::mte, self.mte);
+            // jsconv requires float support
+            enable_feature(Feature::jsconv, self.jscvt && self.fp);
+            enable_feature(Feature::rdm, self.asimdrdm);
+            enable_feature(Feature::dotprod, self.asimddp);
+            enable_feature(Feature::frintts, self.frint);
+
+            // FEAT_I8MM & FEAT_BF16 also include optional SVE components which linux exposes
+            // separately. We ignore that distinction here.
+            enable_feature(Feature::i8mm, self.i8mm);
+            enable_feature(Feature::bf16, self.bf16);
+
+            // ASIMD support requires float support - if half-floats are
+            // supported, it also requires half-float support:
+            let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp);
+            enable_feature(Feature::asimd, asimd);
+            // ASIMD extensions require ASIMD support:
+            enable_feature(Feature::fcma, self.fcma && asimd);
+            enable_feature(Feature::sve, self.sve && asimd);
+
+            // SVE extensions require SVE & ASIMD
+            enable_feature(Feature::f32mm, self.svef32mm && self.sve && asimd);
+            enable_feature(Feature::f64mm, self.svef64mm && self.sve && asimd);
+
+            // Cryptographic extensions require ASIMD
+            enable_feature(Feature::aes, self.aes && asimd);
+            enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd);
+            // SHA512/SHA3 require SHA1 & SHA256
+            enable_feature(
+                Feature::sha3,
+                self.sha512 && self.sha3 && self.sha1 && self.sha2 && asimd,
+            );
+            enable_feature(Feature::sm4, self.sm3 && self.sm4 && asimd);
+
+            // SVE2 requires SVE
+            let sve2 = self.sve2 && self.sve && asimd;
+            enable_feature(Feature::sve2, sve2);
+            enable_feature(Feature::sve2p1, self.sve2p1 && sve2);
+            // SVE2 extensions require SVE2 and crypto features
+            enable_feature(
+                Feature::sve2_aes,
+                self.sveaes && self.svepmull && sve2 && self.aes,
+            );
+            enable_feature(
+                Feature::sve2_sm4,
+                self.svesm4 && sve2 && self.sm3 && self.sm4,
+            );
+            enable_feature(
+                Feature::sve2_sha3,
+                self.svesha3 && sve2 && self.sha512 && self.sha3 && self.sha1 && self.sha2,
+            );
+            enable_feature(Feature::sve2_bitperm, self.svebitperm && self.sve2);
+            enable_feature(Feature::sve_b16b16, self.bf16 && self.sveb16b16);
+            enable_feature(Feature::hbc, self.hbc);
+            enable_feature(Feature::mops, self.mops);
+            enable_feature(Feature::ecv, self.ecv);
+            enable_feature(Feature::lut, self.lut);
+            enable_feature(Feature::cssc, self.cssc);
+            enable_feature(Feature::fpmr, self.fpmr);
+            enable_feature(Feature::faminmax, self.faminmax);
+            let fp8 = self.f8cvt && self.faminmax && self.lut && self.bf16;
+            enable_feature(Feature::fp8, fp8);
+            let fp8fma = self.f8fma && fp8;
+            enable_feature(Feature::fp8fma, fp8fma);
+            let fp8dot4 = self.f8dp4 && fp8fma;
+            enable_feature(Feature::fp8dot4, fp8dot4);
+            enable_feature(Feature::fp8dot2, self.f8dp2 && fp8dot4);
+            enable_feature(Feature::wfxt, self.wfxt);
+            let sme = self.sme && self.bf16;
+            enable_feature(Feature::sme, sme);
+            enable_feature(Feature::sme_i16i64, self.smei16i64 && sme);
+            enable_feature(Feature::sme_f64f64, self.smef64f64 && sme);
+            enable_feature(Feature::sme_fa64, self.smefa64 && sme && sve2);
+            let sme2 = self.sme2 && sme;
+            enable_feature(Feature::sme2, sme2);
+            enable_feature(Feature::sme2p1, self.sme2p1 && sme2);
+            enable_feature(
+                Feature::sme_b16b16,
+                sme2 && self.bf16 && self.sveb16b16 && self.smeb16b16,
+            );
+            enable_feature(Feature::sme_f16f16, self.smef16f16 && sme2);
+            enable_feature(Feature::sme_lutv2, self.smelutv2);
+            let sme_f8f32 = self.smef8f32 && sme2 && fp8;
+            enable_feature(Feature::sme_f8f32, sme_f8f32);
+            enable_feature(Feature::sme_f8f16, self.smef8f16 && sme_f8f32);
+            let ssve_fp8fma = self.smesf8fma && sme2 && fp8;
+            enable_feature(Feature::ssve_fp8fma, ssve_fp8fma);
+            let ssve_fp8dot4 = self.smesf8dp4 && ssve_fp8fma;
+            enable_feature(Feature::ssve_fp8dot4, ssve_fp8dot4);
+            enable_feature(Feature::ssve_fp8dot2, self.smesf8dp2 && ssve_fp8dot4);
+        }
+        value
+    }
+}
+
+#[cfg(target_endian = "little")]
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[cfg(feature = "std_detect_file_io")]
+    mod auxv_from_file {
+        use super::auxvec::auxv_from_file;
+        use super::*;
+        // The baseline hwcaps used in the (artificial) auxv test files.
+        fn baseline_hwcaps() -> AtHwcap {
+            AtHwcap {
+                fp: true,
+                asimd: true,
+                aes: true,
+                pmull: true,
+                sha1: true,
+                sha2: true,
+                crc32: true,
+                atomics: true,
+                fphp: true,
+                asimdhp: true,
+                asimdrdm: true,
+                lrcpc: true,
+                dcpop: true,
+                asimddp: true,
+                ssbs: true,
+                ..AtHwcap::default()
+            }
+        }
+
+        #[test]
+        fn linux_empty_hwcap2_aarch64() {
+            let file = concat!(
+                env!("CARGO_MANIFEST_DIR"),
+                "/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv"
+            );
+            println!("file: {file}");
+            let v = auxv_from_file(file).unwrap();
+            println!("HWCAP : 0x{:0x}", v.hwcap);
+            println!("HWCAP2: 0x{:0x}", v.hwcap2);
+            assert_eq!(AtHwcap::from(v), baseline_hwcaps());
+        }
+        #[test]
+        fn linux_no_hwcap2_aarch64() {
+            let file = concat!(
+                env!("CARGO_MANIFEST_DIR"),
+                "/src/detect/test_data/linux-no-hwcap2-aarch64.auxv"
+            );
+            println!("file: {file}");
+            let v = auxv_from_file(file).unwrap();
+            println!("HWCAP : 0x{:0x}", v.hwcap);
+            println!("HWCAP2: 0x{:0x}", v.hwcap2);
+            assert_eq!(AtHwcap::from(v), baseline_hwcaps());
+        }
+        #[test]
+        fn linux_hwcap2_aarch64() {
+            let file = concat!(
+                env!("CARGO_MANIFEST_DIR"),
+                "/src/detect/test_data/linux-hwcap2-aarch64.auxv"
+            );
+            println!("file: {file}");
+            let v = auxv_from_file(file).unwrap();
+            println!("HWCAP : 0x{:0x}", v.hwcap);
+            println!("HWCAP2: 0x{:0x}", v.hwcap2);
+            assert_eq!(
+                AtHwcap::from(v),
+                AtHwcap {
+                    // Some other HWCAP bits.
+                    paca: true,
+                    pacg: true,
+                    // HWCAP2-only bits.
+                    dcpodp: true,
+                    frint: true,
+                    rng: true,
+                    bti: true,
+                    mte: true,
+                    ..baseline_hwcaps()
+                }
+            );
+        }
+    }
+}