//! Use assembly fma if the `fma` or `fma4` feature is detected at runtime. use core::arch::asm; use super::super::super::generic; use super::detect::{cpu_flags, get_cpu_features}; use crate::support::Round; use crate::support::feature_detect::select_once; pub fn fma(x: f64, y: f64, z: f64) -> f64 { select_once! { sig: fn(x: f64, y: f64, z: f64) -> f64, init: || { let features = get_cpu_features(); if features.contains(cpu_flags::FMA) { fma_with_fma } else if features.contains(cpu_flags::FMA4) { fma_with_fma4 } else { fma_fallback as Func } }, // SAFETY: `fn_ptr` is the result of `init`, preconditions have been checked. call: |fn_ptr: Func| unsafe { fn_ptr(x, y, z) }, } } pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { select_once! { sig: fn(x: f32, y: f32, z: f32) -> f32, init: || { let features = get_cpu_features(); if features.contains(cpu_flags::FMA) { fmaf_with_fma } else if features.contains(cpu_flags::FMA4) { fmaf_with_fma4 } else { fmaf_fallback as Func } }, // SAFETY: `fn_ptr` is the result of `init`, preconditions have been checked. call: |fn_ptr: Func| unsafe { fn_ptr(x, y, z) }, } } /// # Safety /// /// Must have +fma available. unsafe fn fma_with_fma(mut x: f64, y: f64, z: f64) -> f64 { debug_assert!(get_cpu_features().contains(cpu_flags::FMA)); // SAFETY: fma is asserted available by precondition, which provides the instruction. No // memory access or side effects. unsafe { asm!( "vfmadd213sd {x}, {y}, {z}", x = inout(xmm_reg) x, y = in(xmm_reg) y, z = in(xmm_reg) z, options(nostack, nomem, pure), ); } x } /// # Safety /// /// Must have +fma available. unsafe fn fmaf_with_fma(mut x: f32, y: f32, z: f32) -> f32 { debug_assert!(get_cpu_features().contains(cpu_flags::FMA)); // SAFETY: fma is asserted available by precondition, which provides the instruction. No // memory access or side effects. unsafe { asm!( "vfmadd213ss {x}, {y}, {z}", x = inout(xmm_reg) x, y = in(xmm_reg) y, z = in(xmm_reg) z, options(nostack, nomem, pure), ); } x } /// # Safety /// /// Must have +fma4 available. unsafe fn fma_with_fma4(mut x: f64, y: f64, z: f64) -> f64 { debug_assert!(get_cpu_features().contains(cpu_flags::FMA4)); // SAFETY: fma4 is asserted available by precondition, which provides the instruction. No // memory access or side effects. unsafe { asm!( "vfmaddsd {x}, {x}, {y}, {z}", x = inout(xmm_reg) x, y = in(xmm_reg) y, z = in(xmm_reg) z, options(nostack, nomem, pure), ); } x } /// # Safety /// /// Must have +fma4 available. unsafe fn fmaf_with_fma4(mut x: f32, y: f32, z: f32) -> f32 { debug_assert!(get_cpu_features().contains(cpu_flags::FMA4)); // SAFETY: fma4 is asserted available by precondition, which provides the instruction. No // memory access or side effects. unsafe { asm!( "vfmaddss {x}, {x}, {y}, {z}", x = inout(xmm_reg) x, y = in(xmm_reg) y, z = in(xmm_reg) z, options(nostack, nomem, pure), ); } x } // FIXME: the `select_implementation` macro should handle arch implementations that want // to use the fallback, so we don't need to recreate the body. fn fma_fallback(x: f64, y: f64, z: f64) -> f64 { generic::fma_round(x, y, z, Round::Nearest).val } fn fmaf_fallback(x: f32, y: f32, z: f32) -> f32 { generic::fma_wide_round(x, y, z, Round::Nearest).val }