diff options
| author | sayantn <sayantn05@gmail.com> | 2025-04-01 23:54:31 +0530 |
|---|---|---|
| committer | Amanieu d'Antras <amanieu@gmail.com> | 2025-04-07 21:29:15 +0000 |
| commit | fbd13bd08c45fd61ac672e533503ed57d0436261 (patch) | |
| tree | 6d8fc6b087eddc6700619828cbbf2deedcaae8fb /library/stdarch/crates/std_detect/src/detect | |
| parent | 97606212ea304651723311e89f47fe022de13480 (diff) | |
| download | rust-fbd13bd08c45fd61ac672e533503ed57d0436261.tar.gz rust-fbd13bd08c45fd61ac672e533503ed57d0436261.zip | |
Add feature detection for new amx variants and movrs
Diffstat (limited to 'library/stdarch/crates/std_detect/src/detect')
| -rw-r--r-- | library/stdarch/crates/std_detect/src/detect/arch/x86.rs | 22 | ||||
| -rw-r--r-- | library/stdarch/crates/std_detect/src/detect/os/x86.rs | 29 |
2 files changed, 42 insertions, 9 deletions
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs index 6c45c76e9db..7fdb72bb4ae 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs @@ -89,6 +89,11 @@ features! { /// * `"amx-bf16"` /// * `"amx-fp16"` /// * `"amx-complex"` + /// * `"amx-avx512"` + /// * `"amx-fp8"` + /// * `"amx-movrs"` + /// * `"amx-tf32"` + /// * `"amx-transpose"` /// * `"f16c"` /// * `"fma"` /// * `"bmi1"` @@ -109,6 +114,8 @@ features! { /// * `"rtm"` /// * `"movbe"` /// * `"ermsb"` + /// * `"movrs"` + /// * `"xop"` /// /// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide #[stable(feature = "simd_x86", since = "1.27.0")] @@ -177,8 +184,7 @@ features! { @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi: "avx512vbmi"; /// AVX-512 VBMI (Vector Byte Manipulation Instructions) @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vpopcntdq: "avx512vpopcntdq"; - /// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and - /// Quadword) + /// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and Quadword) @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi2: "avx512vbmi2"; /// AVX-512 VBMI2 (Additional byte, word, dword and qword capabilities) @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] gfni: "gfni"; @@ -217,6 +223,16 @@ features! { /// AMX-FP16 (Float16 Operations) @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_complex: "amx-complex"; /// AMX-COMPLEX (Complex number Operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_avx512: "amx-avx512"; + /// AMX-AVX512 (AVX512 operations extended to matrices) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_fp8: "amx-fp8"; + /// AMX-FP8 (Float8 Operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_movrs: "amx-movrs"; + /// AMX-MOVRS (Matrix MOVERS operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_tf32: "amx-tf32"; + /// AMX-TF32 (TensorFloat32 Operations) + @FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_transpose: "amx-transpose"; + /// AMX-TRANSPOSE (Matrix Transpose Operations) @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] f16c: "f16c"; /// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats) @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fma: "fma"; @@ -253,6 +269,8 @@ features! { /// RTM, Intel (Restricted Transactional Memory) @FEATURE: #[stable(feature = "movbe_target_feature", since = "1.67.0")] movbe: "movbe"; /// MOVBE (Move Data After Swapping Bytes) + @FEATURE: #[unstable(feature = "movrs_target_feature", issue = "137976")] movrs: "movrs"; + /// MOVRS (Move data with the read-shared hint) @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] ermsb: "ermsb"; /// ERMSB, Enhanced REP MOVSB and STOSB @FEATURE: #[unstable(feature = "xop_target_feature", issue = "127208")] xop: "xop"; diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs index bb6a44b6438..e48d04ad004 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs @@ -141,6 +141,8 @@ pub(crate) fn detect_features() -> cache::Initializer { enable(extended_features_ebx, 9, Feature::ermsb); + enable(extended_features_eax_leaf_1, 31, Feature::movrs); + // Detect if CPUID.19h available if bit::test(extended_features_ecx as usize, 23) { let CpuidResult { ebx, .. } = unsafe { __cpuid(0x19) }; @@ -250,14 +252,27 @@ pub(crate) fn detect_features() -> cache::Initializer { enable(extended_features_edx, 8, Feature::avx512vp2intersect); enable(extended_features_edx, 23, Feature::avx512fp16); enable(extended_features_eax_leaf_1, 5, Feature::avx512bf16); + } + } + + if os_amx_support { + enable(extended_features_edx, 24, Feature::amx_tile); + enable(extended_features_edx, 25, Feature::amx_int8); + enable(extended_features_edx, 22, Feature::amx_bf16); + enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16); + enable(extended_features_edx_leaf_1, 8, Feature::amx_complex); + + if max_basic_leaf >= 0x1e { + let CpuidResult { + eax: amx_feature_flags_eax, + .. + } = unsafe { __cpuid_count(0x1e_u32, 1) }; - if os_amx_support { - enable(extended_features_edx, 24, Feature::amx_tile); - enable(extended_features_edx, 25, Feature::amx_int8); - enable(extended_features_edx, 22, Feature::amx_bf16); - enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16); - enable(extended_features_edx_leaf_1, 8, Feature::amx_complex); - } + enable(amx_feature_flags_eax, 4, Feature::amx_fp8); + enable(amx_feature_flags_eax, 5, Feature::amx_transpose); + enable(amx_feature_flags_eax, 6, Feature::amx_tf32); + enable(amx_feature_flags_eax, 7, Feature::amx_avx512); + enable(amx_feature_flags_eax, 8, Feature::amx_movrs); } } } |
