diff options
| author | Caleb Zulawski <caleb.zulawski@gmail.com> | 2021-09-15 04:59:03 +0000 |
|---|---|---|
| committer | Jubilee <46493976+workingjubilee@users.noreply.github.com> | 2021-10-11 13:18:59 -0700 |
| commit | 10168fb7c4ab6e8f02627331cc472a7a2b83c11b (patch) | |
| tree | 40fbf2318a5f35bf93110eea3b72f9d34c574e65 | |
| parent | a16b481a08a3d7560f9c92370f18f6ee8c006c9e (diff) | |
| download | rust-10168fb7c4ab6e8f02627331cc472a7a2b83c11b.tar.gz rust-10168fb7c4ab6e8f02627331cc472a7a2b83c11b.zip | |
Add new swizzle API
Expand swizzle API and migrate existing functions. Add rotate_left, rotate_right. Hide implementation details Add simd_shuffle macro
| -rw-r--r-- | crates/core_simd/examples/matrix_inversion.rs | 122 | ||||
| -rw-r--r-- | crates/core_simd/src/intrinsics.rs | 6 | ||||
| -rw-r--r-- | crates/core_simd/src/lib.rs | 1 | ||||
| -rw-r--r-- | crates/core_simd/src/mod.rs | 6 | ||||
| -rw-r--r-- | crates/core_simd/src/permute.rs | 154 | ||||
| -rw-r--r-- | crates/core_simd/src/swizzle.rs | 364 | ||||
| -rw-r--r-- | crates/core_simd/tests/permute.rs | 37 | ||||
| -rw-r--r-- | crates/core_simd/tests/swizzle.rs | 62 |
8 files changed, 491 insertions, 261 deletions
diff --git a/crates/core_simd/examples/matrix_inversion.rs b/crates/core_simd/examples/matrix_inversion.rs index 29bdc512d77..ee8c477b838 100644 --- a/crates/core_simd/examples/matrix_inversion.rs +++ b/crates/core_simd/examples/matrix_inversion.rs @@ -2,6 +2,7 @@ // Code ported from the `packed_simd` crate // Run this code with `cargo test --example matrix_inversion` #![feature(array_chunks, portable_simd)] +use core_simd::Which::*; use core_simd::*; // Gotta define our own 4x4 matrix since Rust doesn't ship multidim arrays yet :^) @@ -163,86 +164,81 @@ pub fn simd_inv4x4(m: Matrix4x4) -> Option<Matrix4x4> { let m_2 = f32x4::from_array(m[2]); let m_3 = f32x4::from_array(m[3]); - // 2 argument shuffle, returns an f32x4 - // the first f32x4 is indexes 0..=3 - // the second f32x4 is indexed 4..=7 - let tmp1 = f32x4::shuffle::<{ [0, 1, 4, 5] }>(m_0, m_1); - let row1 = f32x4::shuffle::<{ [0, 1, 4, 5] }>(m_2, m_3); + const SHUFFLE01: [Which; 4] = [First(0), First(1), Second(0), Second(1)]; + const SHUFFLE02: [Which; 4] = [First(0), First(2), Second(0), Second(2)]; + const SHUFFLE13: [Which; 4] = [First(1), First(3), Second(1), Second(3)]; + const SHUFFLE23: [Which; 4] = [First(2), First(3), Second(2), Second(3)]; - let row0 = f32x4::shuffle::<{ [0, 2, 4, 6] }>(tmp1, row1); - let row1 = f32x4::shuffle::<{ [1, 3, 5, 7] }>(row1, tmp1); + let tmp = simd_shuffle!(m_0, m_1, SHUFFLE01); + let row1 = simd_shuffle!(m_2, m_3, SHUFFLE01); - let tmp1 = f32x4::shuffle::<{ [2, 3, 6, 7] }>(m_0, m_1); - let row3 = f32x4::shuffle::<{ [2, 3, 6, 7] }>(m_2, m_3); - let row2 = f32x4::shuffle::<{ [0, 2, 4, 6] }>(tmp1, row3); - let row3 = f32x4::shuffle::<{ [1, 3, 5, 7] }>(row3, tmp1); + let row0 = simd_shuffle!(tmp, row1, SHUFFLE02); + let row1 = simd_shuffle!(row1, tmp, SHUFFLE13); - let tmp1 = row2 * row3; - // there's no syntax for a 1 arg shuffle yet, - // so we just pass the same f32x4 twice - let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1); + let tmp = simd_shuffle!(m_0, m_1, SHUFFLE23); + let row3 = simd_shuffle!(m_2, m_3, SHUFFLE23); + let row2 = simd_shuffle!(tmp, row3, SHUFFLE02); + let row3 = simd_shuffle!(row3, tmp, SHUFFLE13); - let minor0 = row1 * tmp1; - let minor1 = row0 * tmp1; - let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1); - let minor0 = (row1 * tmp1) - minor0; - let minor1 = (row0 * tmp1) - minor1; - let minor1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(minor1, minor1); + let tmp = (row2 * row3).reverse().rotate_right::<2>(); + let minor0 = row1 * tmp; + let minor1 = row0 * tmp; + let tmp = tmp.rotate_right::<2>(); + let minor0 = (row1 * tmp) - minor0; + let minor1 = (row0 * tmp) - minor1; + let minor1 = minor1.rotate_right::<2>(); - let tmp1 = row1 * row2; - let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1); - let minor0 = (row3 * tmp1) + minor0; - let minor3 = row0 * tmp1; - let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1); + let tmp = (row1 * row2).reverse().rotate_right::<2>(); + let minor0 = (row3 * tmp) + minor0; + let minor3 = row0 * tmp; + let tmp = tmp.rotate_right::<2>(); - let minor0 = minor0 - row3 * tmp1; - let minor3 = row0 * tmp1 - minor3; - let minor3 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(minor3, minor3); + let minor0 = minor0 - row3 * tmp; + let minor3 = row0 * tmp - minor3; + let minor3 = minor3.rotate_right::<2>(); - let tmp1 = row3 * f32x4::shuffle::<{ [2, 3, 0, 1] }>(row1, row1); - let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1); - let row2 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(row2, row2); - let minor0 = row2 * tmp1 + minor0; - let minor2 = row0 * tmp1; - let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1); - let minor0 = minor0 - row2 * tmp1; - let minor2 = row0 * tmp1 - minor2; - let minor2 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(minor2, minor2); + let tmp = (row3 * row1.rotate_right::<2>()) + .reverse() + .rotate_right::<2>(); + let row2 = row2.rotate_right::<2>(); + let minor0 = row2 * tmp + minor0; + let minor2 = row0 * tmp; + let tmp = tmp.rotate_right::<2>(); + let minor0 = minor0 - row2 * tmp; + let minor2 = row0 * tmp - minor2; + let minor2 = minor2.rotate_right::<2>(); - let tmp1 = row0 * row1; - let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1); - let minor2 = minor2 + row3 * tmp1; - let minor3 = row2 * tmp1 - minor3; - let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1); - let minor2 = row3 * tmp1 - minor2; - let minor3 = minor3 - row2 * tmp1; + let tmp = (row0 * row1).reverse().rotate_right::<2>(); + let minor2 = minor2 + row3 * tmp; + let minor3 = row2 * tmp - minor3; + let tmp = tmp.rotate_right::<2>(); + let minor2 = row3 * tmp - minor2; + let minor3 = minor3 - row2 * tmp; - let tmp1 = row0 * row3; - let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1); - let minor1 = minor1 - row2 * tmp1; - let minor2 = row1 * tmp1 + minor2; - let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1); - let minor1 = row2 * tmp1 + minor1; - let minor2 = minor2 - row1 * tmp1; + let tmp = (row0 * row3).reverse().rotate_right::<2>(); + let minor1 = minor1 - row2 * tmp; + let minor2 = row1 * tmp + minor2; + let tmp = tmp.rotate_right::<2>(); + let minor1 = row2 * tmp + minor1; + let minor2 = minor2 - row1 * tmp; - let tmp1 = row0 * row2; - let tmp1 = f32x4::shuffle::<{ [1, 0, 3, 2] }>(tmp1, tmp1); - let minor1 = row3 * tmp1 + minor1; - let minor3 = minor3 - row1 * tmp1; - let tmp1 = f32x4::shuffle::<{ [2, 3, 0, 1] }>(tmp1, tmp1); - let minor1 = minor1 - row3 * tmp1; - let minor3 = row1 * tmp1 + minor3; + let tmp = (row0 * row2).reverse().rotate_right::<2>(); + let minor1 = row3 * tmp + minor1; + let minor3 = minor3 - row1 * tmp; + let tmp = tmp.rotate_right::<2>(); + let minor1 = minor1 - row3 * tmp; + let minor3 = row1 * tmp + minor3; let det = row0 * minor0; - let det = f32x4::shuffle::<{ [2, 3, 0, 1] }>(det, det) + det; - let det = f32x4::shuffle::<{ [1, 0, 3, 2] }>(det, det) + det; + let det = det.rotate_right::<2>() + det; + let det = det.reverse().rotate_right::<2>() + det; if det.horizontal_sum() == 0. { return None; } // calculate the reciprocal - let tmp1 = f32x4::splat(1.0) / det; - let det = tmp1 + tmp1 - det * tmp1 * tmp1; + let tmp = f32x4::splat(1.0) / det; + let det = tmp + tmp - det * tmp * tmp; let res0 = minor0 * det; let res1 = minor1 * det; diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index 5783950f353..5f55cdf0399 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -54,11 +54,7 @@ extern "platform-intrinsic" { pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U; // shufflevector - pub(crate) fn simd_shuffle2<T, U>(x: T, y: T, idx: [u32; 2]) -> U; - pub(crate) fn simd_shuffle4<T, U>(x: T, y: T, idx: [u32; 4]) -> U; - pub(crate) fn simd_shuffle8<T, U>(x: T, y: T, idx: [u32; 8]) -> U; - pub(crate) fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U; - pub(crate) fn simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U; + pub(crate) fn simd_shuffle<T, U, V>(x: T, y: T, idx: U) -> V; pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T; pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V); diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 2d5949f8e79..55b8be97e0e 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -3,6 +3,7 @@ #![feature( adt_const_params, const_fn_trait_bound, + const_panic, platform_intrinsics, repr_simd, simd_ffi, diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs index 5696570d23e..ec874a22389 100644 --- a/crates/core_simd/src/mod.rs +++ b/crates/core_simd/src/mod.rs @@ -1,8 +1,9 @@ #[macro_use] -mod permute; -#[macro_use] mod reduction; +#[macro_use] +mod swizzle; + pub(crate) mod intrinsics; #[cfg(feature = "generic_const_exprs")] @@ -27,5 +28,6 @@ pub mod simd { pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount}; pub use crate::core_simd::masks::*; pub use crate::core_simd::select::Select; + pub use crate::core_simd::swizzle::*; pub use crate::core_simd::vector::*; } diff --git a/crates/core_simd/src/permute.rs b/crates/core_simd/src/permute.rs deleted file mode 100644 index 3e31c3365e8..00000000000 --- a/crates/core_simd/src/permute.rs +++ /dev/null @@ -1,154 +0,0 @@ -use crate::simd::intrinsics; -use crate::simd::{Simd, SimdElement}; - -macro_rules! impl_shuffle_lane { - { $fn:ident, $n:literal } => { - impl<T> Simd<T, $n> - where - T: SimdElement, - { - /// A const SIMD shuffle that takes 2 SIMD vectors and produces another vector, using - /// the indices in the const parameter. The first or "self" vector will have its lanes - /// indexed from 0, and the second vector will have its first lane indexed at $n. - /// Indices must be in-bounds of either vector at compile time. - /// - /// Some SIMD shuffle instructions can be quite slow, so avoiding them by loading data - /// into the desired patterns in advance is preferred, but shuffles are still faster - /// than storing and reloading from memory. - /// - /// ``` - /// #![feature(portable_simd)] - /// # #[cfg(feature = "std")] use core_simd::Simd; - /// # #[cfg(not(feature = "std"))] use core::simd::Simd; - /// let a = Simd::from_array([1.0, 2.0, 3.0, 4.0]); - /// let b = Simd::from_array([5.0, 6.0, 7.0, 8.0]); - /// const IDXS: [u32; 4] = [4,0,3,7]; - /// let c = Simd::<_, 4>::shuffle::<IDXS>(a,b); - /// assert_eq!(Simd::from_array([5.0, 1.0, 4.0, 8.0]), c); - /// ``` - #[inline] - pub fn shuffle<const IDX: [u32; $n]>(self, second: Self) -> Self { - unsafe { intrinsics::$fn(self, second, IDX) } - } - - /// Reverse the order of the lanes in the vector. - #[inline] - pub fn reverse(self) -> Self { - const fn idx() -> [u32; $n] { - let mut idx = [0u32; $n]; - let mut i = 0; - while i < $n { - idx[i] = ($n - i - 1) as u32; - i += 1; - } - idx - } - self.shuffle::<{ idx() }>(self) - } - - /// Interleave two vectors. - /// - /// Produces two vectors with lanes taken alternately from `self` and `other`. - /// - /// The first result contains the first `LANES / 2` lanes from `self` and `other`, - /// alternating, starting with the first lane of `self`. - /// - /// The second result contains the last `LANES / 2` lanes from `self` and `other`, - /// alternating, starting with the lane `LANES / 2` from the start of `self`. - /// - /// This particular permutation is efficient on many architectures. - /// - /// ``` - /// #![feature(portable_simd)] - /// # #[cfg(feature = "std")] use core_simd::Simd; - /// # #[cfg(not(feature = "std"))] use core::simd::Simd; - /// let a = Simd::from_array([0, 1, 2, 3]); - /// let b = Simd::from_array([4, 5, 6, 7]); - /// let (x, y) = a.interleave(b); - /// assert_eq!(x.to_array(), [0, 4, 1, 5]); - /// assert_eq!(y.to_array(), [2, 6, 3, 7]); - /// ``` - #[inline] - pub fn interleave(self, other: Self) -> (Self, Self) { - const fn lo() -> [u32; $n] { - let mut idx = [0u32; $n]; - let mut i = 0; - while i < $n { - let offset = i / 2; - idx[i] = if i % 2 == 0 { - offset - } else { - $n + offset - } as u32; - i += 1; - } - idx - } - const fn hi() -> [u32; $n] { - let mut idx = [0u32; $n]; - let mut i = 0; - while i < $n { - let offset = ($n + i) / 2; - idx[i] = if i % 2 == 0 { - offset - } else { - $n + offset - } as u32; - i += 1; - } - idx - } - (self.shuffle::<{ lo() }>(other), self.shuffle::<{ hi() }>(other)) - } - - /// Deinterleave two vectors. - /// - /// The first result takes every other lane of `self` and then `other`, starting with - /// the first lane. - /// - /// The second result takes every other lane of `self` and then `other`, starting with - /// the second lane. - /// - /// This particular permutation is efficient on many architectures. - /// - /// ``` - /// #![feature(portable_simd)] - /// # #[cfg(feature = "std")] use core_simd::Simd; - /// # #[cfg(not(feature = "std"))] use core::simd::Simd; - /// let a = Simd::from_array([0, 4, 1, 5]); - /// let b = Simd::from_array([2, 6, 3, 7]); - /// let (x, y) = a.deinterleave(b); - /// assert_eq!(x.to_array(), [0, 1, 2, 3]); - /// assert_eq!(y.to_array(), [4, 5, 6, 7]); - /// ``` - #[inline] - pub fn deinterleave(self, other: Self) -> (Self, Self) { - const fn even() -> [u32; $n] { - let mut idx = [0u32; $n]; - let mut i = 0; - while i < $n { - idx[i] = 2 * i as u32; - i += 1; - } - idx - } - const fn odd() -> [u32; $n] { - let mut idx = [0u32; $n]; - let mut i = 0; - while i < $n { - idx[i] = 1 + 2 * i as u32; - i += 1; - } - idx - } - (self.shuffle::<{ even() }>(other), self.shuffle::<{ odd() }>(other)) - } - } - } -} - -impl_shuffle_lane! { simd_shuffle2, 2 } -impl_shuffle_lane! { simd_shuffle4, 4 } -impl_shuffle_lane! { simd_shuffle8, 8 } -impl_shuffle_lane! { simd_shuffle16, 16 } -impl_shuffle_lane! { simd_shuffle32, 32 } diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs new file mode 100644 index 00000000000..048945ddffa --- /dev/null +++ b/crates/core_simd/src/swizzle.rs @@ -0,0 +1,364 @@ +use crate::simd::intrinsics; +use crate::{LaneCount, Simd, SimdElement, SupportedLaneCount}; + +/// Rearrange vector elements. +/// +/// A new vector is constructed by specifying the the lanes of the source vector or vectors to use. +/// +/// When shuffling one vector, the indices of the result vector are indicated by a `const` array +/// of `usize`, like [`Swizzle`]. +/// When shuffling two vectors, the indices are indicated by a `const` array of [`Which`], like +/// [`Swizzle2`]. +/// +/// # Examples +/// ## One source vector +/// ``` +/// # #![feature(portable_simd)] +/// # use core_simd::{Simd, simd_shuffle}; +/// let v = Simd::<f32, 4>::from_array([0., 1., 2., 3.]); +/// let v = simd_shuffle!(v, [3, 0, 1, 2]); +/// assert_eq!(v.to_array(), [3., 0., 1., 2.]); +/// ``` +/// +/// ## Two source vectors +/// ``` +/// # #![feature(portable_simd)] +/// # use core_simd::{Simd, simd_shuffle, Which}; +/// use Which::*; +/// let a = Simd::<f32, 4>::from_array([0., 1., 2., 3.]); +/// let b = Simd::<f32, 4>::from_array([4., 5., 6., 7.]); +/// let v = simd_shuffle!(a, b, [First(0), First(1), Second(2), Second(3)]); +/// assert_eq!(v.to_array(), [0., 1., 6., 7.]); +/// ``` +#[macro_export] +macro_rules! simd_shuffle { + { + $vector:expr, $index:expr $(,)? + } => { + { + // FIXME this won't work when we are in `core`! + use $crate::Swizzle; + struct Shuffle; + impl Swizzle<{$index.len()}, {$index.len()}> for Shuffle { + const INDEX: [usize; {$index.len()}] = $index; + } + Shuffle::swizzle($vector) + } + }; + { + $first:expr, $second:expr, $index:expr $(,)? + } => { + { + // FIXME this won't work when we are in `core`! + use $crate::{Which, Swizzle2}; + struct Shuffle; + impl Swizzle2<{$index.len()}, {$index.len()}> for Shuffle { + const INDEX: [Which; {$index.len()}] = $index; + } + Shuffle::swizzle2($first, $second) + } + } +} + +/// An index into one of two vectors. +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Which { + /// Indexes the first vector. + First(usize), + /// Indexes the second vector. + Second(usize), +} + +/// Create a vector from the elements of another vector. +pub trait Swizzle<const INPUT_LANES: usize, const OUTPUT_LANES: usize> { + /// Map from the lanes of the input vector to the output vector. + const INDEX: [usize; OUTPUT_LANES]; + + /// Create a new vector from the lanes of `vector`. + /// + /// Lane `i` of the output is `vector[Self::INDEX[i]]`. + fn swizzle<T>(vector: Simd<T, INPUT_LANES>) -> Simd<T, OUTPUT_LANES> + where + T: SimdElement, + LaneCount<INPUT_LANES>: SupportedLaneCount, + LaneCount<OUTPUT_LANES>: SupportedLaneCount, + { + unsafe { intrinsics::simd_shuffle(vector, vector, Self::INDEX_IMPL) } + } +} + +/// Create a vector from the elements of two other vectors. +pub trait Swizzle2<const INPUT_LANES: usize, const OUTPUT_LANES: usize> { + /// Map from the lanes of the input vectors to the output vector + const INDEX: [Which; OUTPUT_LANES]; + + /// Create a new vector from the lanes of `first` and `second`. + /// + /// Lane `i` is `first[j]` when `Self::INDEX[i]` is `First(j)`, or `second[j]` when it is + /// `Second(j)`. + fn swizzle2<T>( + first: Simd<T, INPUT_LANES>, + second: Simd<T, INPUT_LANES>, + ) -> Simd<T, OUTPUT_LANES> + where + T: SimdElement, + LaneCount<INPUT_LANES>: SupportedLaneCount, + LaneCount<OUTPUT_LANES>: SupportedLaneCount, + { + unsafe { intrinsics::simd_shuffle(first, second, Self::INDEX_IMPL) } + } +} + +/// The `simd_shuffle` intrinsic expects `u32`, so do error checking and conversion here. +trait SwizzleImpl<const INPUT_LANES: usize, const OUTPUT_LANES: usize> { + const INDEX_IMPL: [u32; OUTPUT_LANES]; +} + +impl<T, const INPUT_LANES: usize, const OUTPUT_LANES: usize> SwizzleImpl<INPUT_LANES, OUTPUT_LANES> + for T +where + T: Swizzle<INPUT_LANES, OUTPUT_LANES> + ?Sized, +{ + const INDEX_IMPL: [u32; OUTPUT_LANES] = { + let mut output = [0; OUTPUT_LANES]; + let mut i = 0; + while i < OUTPUT_LANES { + let index = Self::INDEX[i]; + assert!(index as u32 as usize == index); + assert!(index < INPUT_LANES, "source lane exceeds input lane count",); + output[i] = index as u32; + i += 1; + } + output + }; +} + +/// The `simd_shuffle` intrinsic expects `u32`, so do error checking and conversion here. +trait Swizzle2Impl<const INPUT_LANES: usize, const OUTPUT_LANES: usize> { + const INDEX_IMPL: [u32; OUTPUT_LANES]; +} + +impl<T, const INPUT_LANES: usize, const OUTPUT_LANES: usize> Swizzle2Impl<INPUT_LANES, OUTPUT_LANES> + for T +where + T: Swizzle2<INPUT_LANES, OUTPUT_LANES> + ?Sized, +{ + const INDEX_IMPL: [u32; OUTPUT_LANES] = { + let mut output = [0; OUTPUT_LANES]; + let mut i = 0; + while i < OUTPUT_LANES { + let (offset, index) = match Self::INDEX[i] { + Which::First(index) => (false, index), + Which::Second(index) => (true, index), + }; + assert!(index < INPUT_LANES, "source lane exceeds input lane count",); + + // lanes are indexed by the first vector, then second vector + let index = if offset { index + INPUT_LANES } else { index }; + assert!(index as u32 as usize == index); + output[i] = index as u32; + i += 1; + } + output + }; +} + +impl<T, const LANES: usize> Simd<T, LANES> +where + T: SimdElement, + LaneCount<LANES>: SupportedLaneCount, +{ + /// Reverse the order of the lanes in the vector. + #[inline] + pub fn reverse(self) -> Self { + const fn reverse_index<const LANES: usize>() -> [usize; LANES] { + let mut index = [0; LANES]; + let mut i = 0; + while i < LANES { + index[i] = LANES - i - 1; + i += 1; + } + index + } + + struct Reverse; + + impl<const LANES: usize> Swizzle<LANES, LANES> for Reverse { + const INDEX: [usize; LANES] = reverse_index::<LANES>(); + } + + Reverse::swizzle(self) + } + + /// Rotates the vector such that the first `OFFSET` elements of the slice move to the end + /// while the last `LANES - OFFSET` elements move to the front. After calling `rotate_left`, the + /// element previously in lane `OFFSET` will become the first element in the slice. + #[inline] + pub fn rotate_left<const OFFSET: usize>(self) -> Self { + const fn rotate_index<const OFFSET: usize, const LANES: usize>() -> [usize; LANES] { + let offset = OFFSET % LANES; + let mut index = [0; LANES]; + let mut i = 0; + while i < LANES { + index[i] = (i + offset) % LANES; + i += 1; + } + index + } + + struct Rotate<const OFFSET: usize>; + + impl<const OFFSET: usize, const LANES: usize> Swizzle<LANES, LANES> for Rotate<OFFSET> { + const INDEX: [usize; LANES] = rotate_index::<OFFSET, LANES>(); + } + + Rotate::<OFFSET>::swizzle(self) + } + + /// Rotates the vector such that the first `LANES - OFFSET` elements of the vector move to + /// the end while the last `OFFSET` elements move to the front. After calling `rotate_right`, the + /// element previously at index `LANES - OFFSET` will become the first element in the slice. + #[inline] + pub fn rotate_right<const OFFSET: usize>(self) -> Self { + const fn rotate_index<const OFFSET: usize, const LANES: usize>() -> [usize; LANES] { + let offset = LANES - OFFSET % LANES; + let mut index = [0; LANES]; + let mut i = 0; + while i < LANES { + index[i] = (i + offset) % LANES; + i += 1; + } + index + } + + struct Rotate<const OFFSET: usize>; + + impl<const OFFSET: usize, const LANES: usize> Swizzle<LANES, LANES> for Rotate<OFFSET> { + const INDEX: [usize; LANES] = rotate_index::<OFFSET, LANES>(); + } + + Rotate::<OFFSET>::swizzle(self) + } + + /// Interleave two vectors. + /// + /// Produces two vectors with lanes taken alternately from `self` and `other`. + /// + /// The first result contains the first `LANES / 2` lanes from `self` and `other`, + /// alternating, starting with the first lane of `self`. + /// + /// The second result contains the last `LANES / 2` lanes from `self` and `other`, + /// alternating, starting with the lane `LANES / 2` from the start of `self`. + /// + /// This particular permutation is efficient on many architectures. + /// + /// ``` + /// #![feature(portable_simd)] + /// # use core_simd::Simd; + /// let a = Simd::from_array([0, 1, 2, 3]); + /// let b = Simd::from_array([4, 5, 6, 7]); + /// let (x, y) = a.interleave(b); + /// assert_eq!(x.to_array(), [0, 4, 1, 5]); + /// assert_eq!(y.to_array(), [2, 6, 3, 7]); + /// ``` + #[inline] + pub fn interleave(self, other: Self) -> (Self, Self) { + const fn lo<const LANES: usize>() -> [Which; LANES] { + let mut idx = [Which::First(0); LANES]; + let mut i = 0; + while i < LANES { + let offset = i / 2; + idx[i] = if i % 2 == 0 { + Which::First(offset) + } else { + Which::Second(offset) + }; + i += 1; + } + idx + } + const fn hi<const LANES: usize>() -> [Which; LANES] { + let mut idx = [Which::First(0); LANES]; + let mut i = 0; + while i < LANES { + let offset = (LANES + i) / 2; + idx[i] = if i % 2 == 0 { + Which::First(offset) + } else { + Which::Second(offset) + }; + i += 1; + } + idx + } + + struct Lo; + struct Hi; + + impl<const LANES: usize> Swizzle2<LANES, LANES> for Lo { + const INDEX: [Which; LANES] = lo::<LANES>(); + } + + impl<const LANES: usize> Swizzle2<LANES, LANES> for Hi { + const INDEX: [Which; LANES] = hi::<LANES>(); + } + + (Lo::swizzle2(self, other), Hi::swizzle2(self, other)) + } + + /// Deinterleave two vectors. + /// + /// The first result takes every other lane of `self` and then `other`, starting with + /// the first lane. + /// + /// The second result takes every other lane of `self` and then `other`, starting with + /// the second lane. + /// + /// This particular permutation is efficient on many architectures. + /// + /// ``` + /// #![feature(portable_simd)] + /// # use core_simd::Simd; + /// let a = Simd::from_array([0, 4, 1, 5]); + /// let b = Simd::from_array([2, 6, 3, 7]); + /// let (x, y) = a.deinterleave(b); + /// assert_eq!(x.to_array(), [0, 1, 2, 3]); + /// assert_eq!(y.to_array(), [4, 5, 6, 7]); + /// ``` + #[inline] + pub fn deinterleave(self, other: Self) -> (Self, Self) { + const fn even<const LANES: usize>() -> [Which; LANES] { + let mut idx = [Which::First(0); LANES]; + let mut i = 0; + while i < LANES / 2 { + idx[i] = Which::First(2 * i); + idx[i + LANES / 2] = Which::Second(2 * i); + i += 1; + } + idx + } + const fn odd<const LANES: usize>() -> [Which; LANES] { + let mut idx = [Which::First(0); LANES]; + let mut i = 0; + while i < LANES / 2 { + idx[i] = Which::First(2 * i + 1); + idx[i + LANES / 2] = Which::Second(2 * i + 1); + i += 1; + } + idx + } + + struct Even; + struct Odd; + + impl<const LANES: usize> Swizzle2<LANES, LANES> for Even { + const INDEX: [Which; LANES] = even::<LANES>(); + } + + impl<const LANES: usize> Swizzle2<LANES, LANES> for Odd { + const INDEX: [Which; LANES] = odd::<LANES>(); + } + + (Even::swizzle2(self, other), Odd::swizzle2(self, other)) + } +} diff --git a/crates/core_simd/tests/permute.rs b/crates/core_simd/tests/permute.rs deleted file mode 100644 index ea52e8f5ca7..00000000000 --- a/crates/core_simd/tests/permute.rs +++ /dev/null @@ -1,37 +0,0 @@ -#![feature(portable_simd)] - -use core_simd::Simd; - -#[cfg(target_arch = "wasm32")] -use wasm_bindgen_test::*; - -#[cfg(target_arch = "wasm32")] -wasm_bindgen_test_configure!(run_in_browser); - -#[test] -#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -fn simple_shuffle() { - let a = Simd::from_array([2, 4, 1, 9]); - let b = a; - assert_eq!(a.shuffle::<{ [3, 1, 4, 6] }>(b).to_array(), [9, 4, 2, 1]); -} - -#[test] -#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -fn reverse() { - let a = Simd::from_array([0, 1, 2, 3, 4, 5, 6, 7]); - assert_eq!(a.reverse().to_array(), [7, 6, 5, 4, 3, 2, 1, 0]); -} - -#[test] -#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] -fn interleave() { - let a = Simd::from_array([0, 1, 2, 3, 4, 5, 6, 7]); - let b = Simd::from_array([8, 9, 10, 11, 12, 13, 14, 15]); - let (lo, hi) = a.interleave(b); - assert_eq!(lo.to_array(), [0, 8, 1, 9, 2, 10, 3, 11]); - assert_eq!(hi.to_array(), [4, 12, 5, 13, 6, 14, 7, 15]); - let (even, odd) = lo.deinterleave(hi); - assert_eq!(even, a); - assert_eq!(odd, b); -} diff --git a/crates/core_simd/tests/swizzle.rs b/crates/core_simd/tests/swizzle.rs new file mode 100644 index 00000000000..d4abc46b932 --- /dev/null +++ b/crates/core_simd/tests/swizzle.rs @@ -0,0 +1,62 @@ +#![feature(portable_simd)] +use core_simd::{Simd, Swizzle}; + +#[cfg(target_arch = "wasm32")] +use wasm_bindgen_test::*; + +#[cfg(target_arch = "wasm32")] +wasm_bindgen_test_configure!(run_in_browser); + +#[test] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn swizzle() { + struct Index; + impl Swizzle<4, 4> for Index { + const INDEX: [usize; 4] = [2, 1, 3, 0]; + } + impl Swizzle<4, 2> for Index { + const INDEX: [usize; 2] = [1, 1]; + } + + let vector = Simd::from_array([2, 4, 1, 9]); + assert_eq!(Index::swizzle(vector).to_array(), [1, 4, 9, 2]); + assert_eq!(Index::swizzle(vector).to_array(), [4, 4]); +} + +#[test] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn reverse() { + let a = Simd::from_array([1, 2, 3, 4]); + assert_eq!(a.reverse().to_array(), [4, 3, 2, 1]); +} + +#[test] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn rotate() { + let a = Simd::from_array([1, 2, 3, 4]); + assert_eq!(a.rotate_left::<0>().to_array(), [1, 2, 3, 4]); + assert_eq!(a.rotate_left::<1>().to_array(), [2, 3, 4, 1]); + assert_eq!(a.rotate_left::<2>().to_array(), [3, 4, 1, 2]); + assert_eq!(a.rotate_left::<3>().to_array(), [4, 1, 2, 3]); + assert_eq!(a.rotate_left::<4>().to_array(), [1, 2, 3, 4]); + assert_eq!(a.rotate_left::<5>().to_array(), [2, 3, 4, 1]); + assert_eq!(a.rotate_right::<0>().to_array(), [1, 2, 3, 4]); + assert_eq!(a.rotate_right::<1>().to_array(), [4, 1, 2, 3]); + assert_eq!(a.rotate_right::<2>().to_array(), [3, 4, 1, 2]); + assert_eq!(a.rotate_right::<3>().to_array(), [2, 3, 4, 1]); + assert_eq!(a.rotate_right::<4>().to_array(), [1, 2, 3, 4]); + assert_eq!(a.rotate_right::<5>().to_array(), [4, 1, 2, 3]); +} + +#[test] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn interleave() { + let a = Simd::from_array([0, 1, 2, 3, 4, 5, 6, 7]); + let b = Simd::from_array([8, 9, 10, 11, 12, 13, 14, 15]); + let (lo, hi) = a.interleave(b); + assert_eq!(lo.to_array(), [0, 8, 1, 9, 2, 10, 3, 11]); + assert_eq!(hi.to_array(), [4, 12, 5, 13, 6, 14, 7, 15]); + let (even, odd) = lo.deinterleave(hi); + assert_eq!(even, a); + assert_eq!(odd, b); +} |
