//! Internal `#[repr(simd)]` types #![allow(non_camel_case_types)] macro_rules! simd_ty { ($id:ident [$elem_type:ty ; $len:literal]: $($param_name:ident),*) => { #[repr(simd)] #[derive(Copy, Clone)] pub(crate) struct $id([$elem_type; $len]); #[allow(clippy::use_self)] impl $id { /// A value of this type where all elements are zeroed out. pub(crate) const ZERO: Self = unsafe { crate::mem::zeroed() }; #[inline(always)] pub(crate) const fn new($($param_name: $elem_type),*) -> Self { $id([$($param_name),*]) } #[inline(always)] pub(crate) const fn from_array(elements: [$elem_type; $len]) -> Self { $id(elements) } // FIXME: Workaround rust@60637 #[inline(always)] pub(crate) fn splat(value: $elem_type) -> Self { #[derive(Copy, Clone)] #[repr(simd)] struct JustOne([$elem_type; 1]); let one = JustOne([value]); // SAFETY: 0 is always in-bounds because we're shuffling // a simd type with exactly one element. unsafe { simd_shuffle!(one, one, [0; $len]) } } /// Extract the element at position `index`. /// `index` is not a constant so this is not efficient! /// Use for testing only. // FIXME: Workaround rust@60637 #[inline(always)] pub(crate) fn extract(&self, index: usize) -> $elem_type { self.as_array()[index] } #[inline] pub(crate) fn as_array(&self) -> &[$elem_type; $len] { let simd_ptr: *const Self = self; let array_ptr: *const [$elem_type; $len] = simd_ptr.cast(); // SAFETY: We can always read the prefix of a simd type as an array. // There might be more padding afterwards for some widths, but // that's not a problem for reading less than that. unsafe { &*array_ptr } } } impl core::cmp::PartialEq for $id { #[inline] fn eq(&self, other: &Self) -> bool { self.as_array() == other.as_array() } } impl core::fmt::Debug for $id { #[inline] fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { debug_simd_finish(f, stringify!($id), self.as_array()) } } } } macro_rules! simd_m_ty { ($id:ident [$elem_type:ident ; $len:literal]: $($param_name:ident),*) => { #[repr(simd)] #[derive(Copy, Clone)] pub(crate) struct $id([$elem_type; $len]); #[allow(clippy::use_self)] impl $id { #[inline(always)] const fn bool_to_internal(x: bool) -> $elem_type { [0 as $elem_type, !(0 as $elem_type)][x as usize] } #[inline(always)] pub(crate) const fn new($($param_name: bool),*) -> Self { $id([$(Self::bool_to_internal($param_name)),*]) } // FIXME: Workaround rust@60637 #[inline(always)] pub(crate) fn splat(value: bool) -> Self { #[derive(Copy, Clone)] #[repr(simd)] struct JustOne([$elem_type; 1]); let one = JustOne([Self::bool_to_internal(value)]); // SAFETY: 0 is always in-bounds because we're shuffling // a simd type with exactly one element. unsafe { simd_shuffle!(one, one, [0; $len]) } } #[inline] pub(crate) fn as_array(&self) -> &[$elem_type; $len] { let simd_ptr: *const Self = self; let array_ptr: *const [$elem_type; $len] = simd_ptr.cast(); // SAFETY: We can always read the prefix of a simd type as an array. // There might be more padding afterwards for some widths, but // that's not a problem for reading less than that. unsafe { &*array_ptr } } } impl core::cmp::PartialEq for $id { #[inline] fn eq(&self, other: &Self) -> bool { self.as_array() == other.as_array() } } impl core::fmt::Debug for $id { #[inline] fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { debug_simd_finish(f, stringify!($id), self.as_array()) } } } } // 16-bit wide types: simd_ty!(u8x2[u8;2]: x0, x1); simd_ty!(i8x2[i8;2]: x0, x1); // 32-bit wide types: simd_ty!(u8x4[u8;4]: x0, x1, x2, x3); simd_ty!(u16x2[u16;2]: x0, x1); simd_ty!(i8x4[i8;4]: x0, x1, x2, x3); simd_ty!(i16x2[i16;2]: x0, x1); // 64-bit wide types: simd_ty!( u8x8[u8;8]: x0, x1, x2, x3, x4, x5, x6, x7 ); simd_ty!(u16x4[u16;4]: x0, x1, x2, x3); simd_ty!(u32x2[u32;2]: x0, x1); simd_ty!(u64x1[u64;1]: x1); simd_ty!( i8x8[i8;8]: x0, x1, x2, x3, x4, x5, x6, x7 ); simd_ty!(i16x4[i16;4]: x0, x1, x2, x3); simd_ty!(i32x2[i32;2]: x0, x1); simd_ty!(i64x1[i64;1]: x1); simd_ty!(f32x2[f32;2]: x0, x1); simd_ty!(f64x1[f64;1]: x1); // 128-bit wide types: simd_ty!( u8x16[u8;16]: x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 ); simd_ty!( u16x8[u16;8]: x0, x1, x2, x3, x4, x5, x6, x7 ); simd_ty!(u32x4[u32;4]: x0, x1, x2, x3); simd_ty!(u64x2[u64;2]: x0, x1); simd_ty!( i8x16[i8;16]: x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 ); simd_ty!( i16x8[i16;8]: x0, x1, x2, x3, x4, x5, x6, x7 ); simd_ty!(i32x4[i32;4]: x0, x1, x2, x3); simd_ty!(i64x2[i64;2]: x0, x1); simd_ty!(f16x4[f16;4]: x0, x1, x2, x3); simd_ty!( f16x8[f16;8]: x0, x1, x2, x3, x4, x5, x6, x7 ); simd_ty!(f32x4[f32;4]: x0, x1, x2, x3); simd_ty!(f64x2[f64;2]: x0, x1); simd_m_ty!( m8x16[i8;16]: x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 ); simd_m_ty!( m16x8[i16;8]: x0, x1, x2, x3, x4, x5, x6, x7 ); simd_m_ty!(m32x4[i32;4]: x0, x1, x2, x3); simd_m_ty!(m64x2[i64;2]: x0, x1); // 256-bit wide types: simd_ty!( u8x32[u8;32]: x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 ); simd_ty!( u16x16[u16;16]: x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 ); simd_ty!( u32x8[u32;8]: x0, x1, x2, x3, x4, x5, x6, x7 ); simd_ty!(u64x4[u64;4]: x0, x1, x2, x3); simd_ty!( i8x32[i8;32]: x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 ); simd_ty!( i16x16[i16;16]: x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 ); simd_ty!( i32x8[i32;8]: x0, x1, x2, x3, x4, x5, x6, x7 ); simd_ty!(i64x4[i64;4]: x0, x1, x2, x3); simd_ty!( f16x16[f16;16]: x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 ); simd_ty!( f32x8[f32;8]: x0, x1, x2, x3, x4, x5, x6, x7 ); simd_ty!(f64x4[f64;4]: x0, x1, x2, x3); simd_m_ty!( m8x32[i8;32]: x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 ); simd_m_ty!( m16x16[i16;16]: x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 ); simd_m_ty!( m32x8[i32;8]: x0, x1, x2, x3, x4, x5, x6, x7 ); // 512-bit wide types: simd_ty!( i8x64[i8;64]: x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31, x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 ); simd_ty!( u8x64[u8;64]: x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31, x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 ); simd_ty!( i16x32[i16;32]: x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 ); simd_ty!( u16x32[u16;32]: x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 ); simd_ty!( i32x16[i32;16]: x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 ); simd_ty!( u32x16[u32;16]: x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 ); simd_ty!( f16x32[f16;32]: x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 ); simd_ty!( f32x16[f32;16]: x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 ); simd_ty!( i64x8[i64;8]: x0, x1, x2, x3, x4, x5, x6, x7 ); simd_ty!( u64x8[u64;8]: x0, x1, x2, x3, x4, x5, x6, x7 ); simd_ty!( f64x8[f64;8]: x0, x1, x2, x3, x4, x5, x6, x7 ); // 1024-bit wide types: simd_ty!( u16x64[u16;64]: x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31, x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 ); simd_ty!( i32x32[i32;32]: x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 ); simd_ty!( u32x32[u32;32]: x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 ); /// Used to continue `Debug`ging SIMD types as `MySimd(1, 2, 3, 4)`, as they /// were before moving to array-based simd. #[inline] pub(crate) fn debug_simd_finish( formatter: &mut crate::fmt::Formatter<'_>, type_name: &str, array: &[T; N], ) -> crate::fmt::Result { crate::fmt::Formatter::debug_tuple_fields_finish( formatter, type_name, &crate::array::from_fn::<&dyn crate::fmt::Debug, N, _>(|i| &array[i]), ) }