diff options
| author | Jubilee <46493976+workingjubilee@users.noreply.github.com> | 2021-06-23 20:47:51 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-06-23 20:47:51 -0700 |
| commit | b5ba19577f1c463b4482a6a5759a9475006e50dd (patch) | |
| tree | 0fba7c5834be7515ccf78405cdd2a391e6075731 | |
| parent | 3872723ead2e4784ab483c289160d8d14086c037 (diff) | |
| parent | 1529ed43d8b029c0406d636a6d2ae84bf5346aca (diff) | |
| download | rust-b5ba19577f1c463b4482a6a5759a9475006e50dd.tar.gz rust-b5ba19577f1c463b4482a6a5759a9475006e50dd.zip | |
Merge pull request #139 from rust-lang/feat/gather
Add SimdArray trait and safe gather/scatter API (rust-lang/stdsimd#139)
This PR has four parts, without which it doesn't make a lot of sense:
- The introduction of the SimdArray trait for abstraction over vectors.
- The implementation of private vector-of-pointers types.
- Using these to allow constructing vectors with SimdArray::gather_{or, or_default, select}.
- Using these to allow writing vectors using SimdArray::scatter{,_select}.
| -rw-r--r-- | crates/core_simd/src/array.rs | 248 | ||||
| -rw-r--r-- | crates/core_simd/src/intrinsics.rs | 5 | ||||
| -rw-r--r-- | crates/core_simd/src/lib.rs | 3 | ||||
| -rw-r--r-- | crates/core_simd/src/vector.rs | 3 | ||||
| -rw-r--r-- | crates/core_simd/src/vector/ptr.rs | 55 |
5 files changed, 313 insertions, 1 deletions
diff --git a/crates/core_simd/src/array.rs b/crates/core_simd/src/array.rs new file mode 100644 index 00000000000..0a52876e55b --- /dev/null +++ b/crates/core_simd/src/array.rs @@ -0,0 +1,248 @@ +use crate::intrinsics; +use crate::masks::*; +use crate::vector::ptr::{SimdConstPtr, SimdMutPtr}; +use crate::vector::*; + +/// A representation of a vector as an "array" with indices, implementing +/// operations applicable to any vector type based solely on "having lanes", +/// and describing relationships between vector and scalar types. +pub trait SimdArray<const LANES: usize>: crate::LanesAtMost32 +where + SimdUsize<LANES>: crate::LanesAtMost32, + SimdIsize<LANES>: crate::LanesAtMost32, + MaskSize<LANES>: crate::Mask, + Self: Sized, +{ + /// The scalar type in every lane of this vector type. + type Scalar: Copy + Sized; + /// The number of lanes for this vector. + const LANES: usize = LANES; + + /// Generates a SIMD vector with the same value in every lane. + #[must_use] + fn splat(val: Self::Scalar) -> Self; + + /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices. + /// If an index is out of bounds, that lane instead selects the value from the "or" vector. + /// ``` + /// # use core_simd::*; + /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; + /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]); + /// let alt = SimdI32::from_array([-5, -4, -3, -2]); + /// + /// let result = SimdI32::<4>::gather_or(&vec, idxs, alt); // Note the lane that is out-of-bounds. + /// assert_eq!(result, SimdI32::from_array([-5, 13, 10, 15])); + /// ``` + #[must_use] + #[inline] + fn gather_or(slice: &[Self::Scalar], idxs: SimdUsize<LANES>, or: Self) -> Self { + Self::gather_select(slice, MaskSize::splat(true), idxs, or) + } + + /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices. + /// Out-of-bounds indices instead use the default value for that lane (0). + /// ``` + /// # use core_simd::*; + /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; + /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]); + /// + /// let result = SimdI32::<4>::gather_or_default(&vec, idxs); // Note the lane that is out-of-bounds. + /// assert_eq!(result, SimdI32::from_array([0, 13, 10, 15])); + /// ``` + #[must_use] + #[inline] + fn gather_or_default(slice: &[Self::Scalar], idxs: SimdUsize<LANES>) -> Self + where + Self::Scalar: Default, + { + Self::gather_or(slice, idxs, Self::splat(Self::Scalar::default())) + } + + /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices. + /// Out-of-bounds or masked indices instead select the value from the "or" vector. + /// ``` + /// # use core_simd::*; + /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; + /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]); + /// let alt = SimdI32::from_array([-5, -4, -3, -2]); + /// let mask = MaskSize::from_array([true, true, true, false]); // Note the mask of the last lane. + /// + /// let result = SimdI32::<4>::gather_select(&vec, mask, idxs, alt); // Note the lane that is out-of-bounds. + /// assert_eq!(result, SimdI32::from_array([-5, 13, 10, -2])); + /// ``` + #[must_use] + #[inline] + fn gather_select( + slice: &[Self::Scalar], + mask: MaskSize<LANES>, + idxs: SimdUsize<LANES>, + or: Self, + ) -> Self { + let mask = (mask & idxs.lanes_lt(SimdUsize::splat(slice.len()))).to_int(); + let base_ptr = SimdConstPtr::splat(slice.as_ptr()); + // Ferris forgive me, I have done pointer arithmetic here. + let ptrs = base_ptr.wrapping_add(idxs); + // SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah + unsafe { intrinsics::simd_gather(or, ptrs, mask) } + } + + /// SIMD scatter: write a SIMD vector's values into a slice, using potentially discontiguous indices. + /// Out-of-bounds indices are not written. + /// `scatter` writes "in order", so if an index receives two writes, only the last is guaranteed. + /// ``` + /// # use core_simd::*; + /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; + /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 0]); + /// let vals = SimdI32::from_array([-27, 82, -41, 124]); + /// + /// vals.scatter(&mut vec, idxs); // index 0 receives two writes. + /// assert_eq!(vec, vec![124, 11, 12, 82, 14, 15, 16, 17, 18]); + /// ``` + #[inline] + fn scatter(self, slice: &mut [Self::Scalar], idxs: SimdUsize<LANES>) { + self.scatter_select(slice, MaskSize::splat(true), idxs) + } + + /// SIMD scatter: write a SIMD vector's values into a slice, using potentially discontiguous indices. + /// Out-of-bounds or masked indices are not written. + /// `scatter_select` writes "in order", so if an index receives two writes, only the last is guaranteed. + /// ``` + /// # use core_simd::*; + /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; + /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 0]); + /// let vals = SimdI32::from_array([-27, 82, -41, 124]); + /// let mask = MaskSize::from_array([true, true, true, false]); // Note the mask of the last lane. + /// + /// vals.scatter_select(&mut vec, mask, idxs); // index 0's second write is masked, thus omitted. + /// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]); + /// ``` + #[inline] + fn scatter_select( + self, + slice: &mut [Self::Scalar], + mask: MaskSize<LANES>, + idxs: SimdUsize<LANES>, + ) { + // We must construct our scatter mask before we derive a pointer! + let mask = (mask & idxs.lanes_lt(SimdUsize::splat(slice.len()))).to_int(); + // SAFETY: This block works with *mut T derived from &mut 'a [T], + // which means it is delicate in Rust's borrowing model, circa 2021: + // &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts! + // Even though this block is largely safe methods, it must be almost exactly this way + // to prevent invalidating the raw ptrs while they're live. + // Thus, entering this block requires all values to use being already ready: + // 0. idxs we want to write to, which are used to construct the mask. + // 1. mask, which depends on an initial &'a [T] and the idxs. + // 2. actual values to scatter (self). + // 3. &mut [T] which will become our base ptr. + unsafe { + // Now Entering ☢️ *mut T Zone + let base_ptr = SimdMutPtr::splat(slice.as_mut_ptr()); + // Ferris forgive me, I have done pointer arithmetic here. + let ptrs = base_ptr.wrapping_add(idxs); + // The ptrs have been bounds-masked to prevent memory-unsafe writes insha'allah + intrinsics::simd_scatter(self, ptrs, mask) + // Cleared ☢️ *mut T Zone + } + } +} + +macro_rules! impl_simdarray_for { + ($simd:ident {type Scalar = $scalar:ident;}) => { + impl<const LANES: usize> SimdArray<LANES> for $simd<LANES> + where SimdUsize<LANES>: crate::LanesAtMost32, + SimdIsize<LANES>: crate::LanesAtMost32, + MaskSize<LANES>: crate::Mask, + Self: crate::LanesAtMost32, + { + type Scalar = $scalar; + + #[must_use] + #[inline] + fn splat(val: Self::Scalar) -> Self { + [val; LANES].into() + } + } + }; + + ($simd:ident $impl:tt) => { + impl<const LANES: usize> SimdArray<LANES> for $simd<LANES> + where SimdUsize<LANES>: crate::LanesAtMost32, + SimdIsize<LANES>: crate::LanesAtMost32, + MaskSize<LANES>: crate::Mask, + Self: crate::LanesAtMost32, + $impl + } +} + +impl_simdarray_for! { + SimdUsize { + type Scalar = usize; + } +} + +impl_simdarray_for! { + SimdIsize { + type Scalar = isize; + } +} + +impl_simdarray_for! { + SimdI8 { + type Scalar = i8; + } +} + +impl_simdarray_for! { + SimdI16 { + type Scalar = i16; + } +} + +impl_simdarray_for! { + SimdI32 { + type Scalar = i32; + } +} + +impl_simdarray_for! { + SimdI64 { + type Scalar = i64; + } +} + +impl_simdarray_for! { + SimdU8 { + type Scalar = u8; + } +} + +impl_simdarray_for! { + SimdU16 { + type Scalar = u16; + } +} + +impl_simdarray_for! { + SimdU32 { + type Scalar = u32; + } +} + +impl_simdarray_for! { + SimdU64 { + type Scalar = u64; + } +} + +impl_simdarray_for! { + SimdF32 { + type Scalar = f32; + } +} + +impl_simdarray_for! { + SimdF64 { + type Scalar = f64; + } +} diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index 3983beb82ec..944026c080a 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -45,7 +45,7 @@ extern "platform-intrinsic" { /// fabs pub(crate) fn simd_fabs<T>(x: T) -> T; - + /// fsqrt pub(crate) fn simd_fsqrt<T>(x: T) -> T; @@ -66,6 +66,9 @@ extern "platform-intrinsic" { pub(crate) fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U; pub(crate) fn simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U; + pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T; + pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V); + // {s,u}add.sat pub(crate) fn simd_saturating_add<T>(x: T, y: T) -> T; diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 5b74d1d574a..8d963e86d12 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -36,3 +36,6 @@ pub use masks::*; mod vector; pub use vector::*; + +mod array; +pub use array::SimdArray; diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 95534384b70..95214ea8864 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -5,3 +5,6 @@ mod uint; pub use float::*; pub use int::*; pub use uint::*; + +// Vectors of pointers are not for public use at the current time. +pub(crate) mod ptr; diff --git a/crates/core_simd/src/vector/ptr.rs b/crates/core_simd/src/vector/ptr.rs new file mode 100644 index 00000000000..30bef038b33 --- /dev/null +++ b/crates/core_simd/src/vector/ptr.rs @@ -0,0 +1,55 @@ +//! Private implementation details of public gather/scatter APIs. +use crate::SimdUsize; +use core::mem; + +/// A vector of *const T. +#[derive(Debug, Copy, Clone)] +#[repr(simd)] +pub(crate) struct SimdConstPtr<T, const LANES: usize>([*const T; LANES]); + +impl<T, const LANES: usize> SimdConstPtr<T, LANES> +where + SimdUsize<LANES>: crate::LanesAtMost32, + T: Sized, +{ + #[inline] + #[must_use] + pub fn splat(ptr: *const T) -> Self { + Self([ptr; LANES]) + } + + #[inline] + #[must_use] + pub fn wrapping_add(self, addend: SimdUsize<LANES>) -> Self { + unsafe { + let x: SimdUsize<LANES> = mem::transmute_copy(&self); + mem::transmute_copy(&{ x + (addend * mem::size_of::<T>()) }) + } + } +} + +/// A vector of *mut T. Be very careful around potential aliasing. +#[derive(Debug, Copy, Clone)] +#[repr(simd)] +pub(crate) struct SimdMutPtr<T, const LANES: usize>([*mut T; LANES]); + +impl<T, const LANES: usize> SimdMutPtr<T, LANES> +where + SimdUsize<LANES>: crate::LanesAtMost32, + T: Sized, +{ + #[inline] + #[must_use] + pub fn splat(ptr: *mut T) -> Self { + Self([ptr; LANES]) + } + + #[inline] + #[must_use] + pub fn wrapping_add(self, addend: SimdUsize<LANES>) -> Self { + unsafe { + let x: SimdUsize<LANES> = mem::transmute_copy(&self); + mem::transmute_copy(&{ x + (addend * mem::size_of::<T>()) }) + } + } +} |
