about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--crates/core_simd/src/array.rs66
-rw-r--r--crates/core_simd/src/intrinsics.rs4
2 files changed, 69 insertions, 1 deletions
diff --git a/crates/core_simd/src/array.rs b/crates/core_simd/src/array.rs
index d2f944d1e53..36e1fb59675 100644
--- a/crates/core_simd/src/array.rs
+++ b/crates/core_simd/src/array.rs
@@ -1,4 +1,6 @@
+use crate::intrinsics;
 use crate::masks::*;
+use crate::vector::ptr::SimdConstPtr;
 use crate::vector::*;
 
 /// A representation of a vector as an "array" with indices, implementing
@@ -17,6 +19,70 @@ where
     /// Generates a SIMD vector with the same value in every lane.
     #[must_use]
     fn splat(val: Self::Scalar) -> Self;
+
+    /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
+    /// If an index is out of bounds, that lane instead selects the value from the "or" vector.
+    /// ```
+    /// # use core_simd::*;
+    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+    /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
+    /// let alt = SimdI32::from_array([-5, -4, -3, -2]);
+    ///
+    /// let result = SimdI32::<4>::gather_or(&vec, idxs, alt); // Note the lane that is out-of-bounds.
+    /// assert_eq!(result, SimdI32::from_array([-5, 13, 10, 15]));
+    /// ```
+    #[must_use]
+    #[inline]
+    fn gather_or(slice: &[Self::Scalar], idxs: SimdUsize<LANES>, or: Self) -> Self {
+        Self::gather_select(slice, MaskSize::splat(true), idxs, or)
+    }
+
+    /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
+    /// Out-of-bounds indices instead use the default value for that lane (0).
+    /// ```
+    /// # use core_simd::*;
+    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+    /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
+    ///
+    /// let result = SimdI32::<4>::gather_or_default(&vec, idxs); // Note the lane that is out-of-bounds.
+    /// assert_eq!(result, SimdI32::from_array([0, 13, 10, 15]));
+    /// ```
+    #[must_use]
+    #[inline]
+    fn gather_or_default(slice: &[Self::Scalar], idxs: SimdUsize<LANES>) -> Self
+    where
+        Self::Scalar: Default,
+    {
+        Self::gather_or(slice, idxs, Self::splat(Self::Scalar::default()))
+    }
+
+    /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
+    /// Out-of-bounds or masked indices instead select the value from the "or" vector.
+    /// ```
+    /// # use core_simd::*;
+    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+    /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
+    /// let alt = SimdI32::from_array([-5, -4, -3, -2]);
+    /// let mask = MaskSize::from_array([true, true, true, false]); // Note the mask of the last lane.
+    ///
+    /// let result = SimdI32::<4>::gather_select(&vec, mask, idxs, alt); // Note the lane that is out-of-bounds.
+    /// assert_eq!(result, SimdI32::from_array([-5, 13, 10, -2]));
+    /// ```
+    #[must_use]
+    #[inline]
+    fn gather_select(
+        slice: &[Self::Scalar],
+        mask: MaskSize<LANES>,
+        idxs: SimdUsize<LANES>,
+        or: Self,
+    ) -> Self {
+        let mask = (mask & idxs.lanes_lt(SimdUsize::splat(slice.len()))).to_int();
+        let base_ptr = SimdConstPtr::splat(slice.as_ptr());
+        // Ferris forgive me, I have done pointer arithmetic here.
+        let ptrs = base_ptr.wrapping_add(idxs);
+        // SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
+        unsafe { intrinsics::simd_gather(or, ptrs, mask) }
+    }
 }
 
 macro_rules! impl_simdarray_for {
diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs
index 7adf4c24e10..e69696e515b 100644
--- a/crates/core_simd/src/intrinsics.rs
+++ b/crates/core_simd/src/intrinsics.rs
@@ -45,7 +45,7 @@ extern "platform-intrinsic" {
 
     /// fabs
     pub(crate) fn simd_fabs<T>(x: T) -> T;
-    
+
     /// fsqrt
     pub(crate) fn simd_fsqrt<T>(x: T) -> T;
 
@@ -63,6 +63,8 @@ extern "platform-intrinsic" {
     pub(crate) fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U;
     pub(crate) fn simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U;
 
+    pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
+
     // {s,u}add.sat
     pub(crate) fn simd_saturating_add<T>(x: T, y: T) -> T;