about summary refs log tree commit diff
diff options
context:
space:
mode:
authorJubilee Young <workingjubilee@gmail.com>2021-06-21 18:04:39 -0700
committerJubilee Young <workingjubilee@gmail.com>2021-06-21 18:18:50 -0700
commit81ceda8c5b93df524fca5f2a14f93d044c0dc6da (patch)
tree89714658f0395e99d8166ea0c8e9a6595c764c77
parent128b6f5e22db1aad085627322f1deb7fb71b07ae (diff)
downloadrust-81ceda8c5b93df524fca5f2a14f93d044c0dc6da.tar.gz
rust-81ceda8c5b93df524fca5f2a14f93d044c0dc6da.zip
Add SimdArray::scatter{,_select}
-rw-r--r--crates/core_simd/src/array.rs60
-rw-r--r--crates/core_simd/src/intrinsics.rs1
2 files changed, 60 insertions, 1 deletions
diff --git a/crates/core_simd/src/array.rs b/crates/core_simd/src/array.rs
index 36e1fb59675..202a44f4118 100644
--- a/crates/core_simd/src/array.rs
+++ b/crates/core_simd/src/array.rs
@@ -1,6 +1,6 @@
 use crate::intrinsics;
 use crate::masks::*;
-use crate::vector::ptr::SimdConstPtr;
+use crate::vector::ptr::{SimdConstPtr, SimdMutPtr};
 use crate::vector::*;
 
 /// A representation of a vector as an "array" with indices, implementing
@@ -83,6 +83,64 @@ where
         // SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
         unsafe { intrinsics::simd_gather(or, ptrs, mask) }
     }
+
+    /// SIMD scatter: write a SIMD vector's values into a slice, using potentially discontiguous indices.
+    /// Out-of-bounds indices are not written.
+    /// ```
+    /// # use core_simd::*;
+    /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+    /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
+    /// let vals = SimdI32::from_array([-5, -4, -3, -2]);
+    ///
+    /// vals.scatter(&mut vec, idxs);
+    /// assert_eq!(vec, vec![-3, 11, 12, -4, 14, -2, 16, 17, 18]);
+    /// ```
+    #[inline]
+    fn scatter(self, slice: &mut [Self::Scalar], idxs: SimdUsize<LANES>) {
+        self.scatter_select(slice, MaskSize::splat(true), idxs)
+    }
+
+    /// SIMD scatter: write a SIMD vector's values into a slice, using potentially discontiguous indices.
+    /// Out-of-bounds or masked indices are not written.
+    /// ```
+    /// # use core_simd::*;
+    /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+    /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
+    /// let vals = SimdI32::from_array([-5, -4, -3, -2]);
+    /// let mask = MaskSize::from_array([true, true, true, false]); // Note the mask of the last lane.
+    ///
+    /// vals.scatter_select(&mut vec, mask, idxs);
+    /// assert_eq!(vec, vec![-3, 11, 12, -4, 14, 15, 16, 17, 18]);
+    /// ```
+    #[inline]
+    fn scatter_select(
+        self,
+        slice: &mut [Self::Scalar],
+        mask: MaskSize<LANES>,
+        idxs: SimdUsize<LANES>,
+    ) {
+        // We must construct our scatter mask before we derive a pointer!
+        let mask = (mask & idxs.lanes_lt(SimdUsize::splat(slice.len()))).to_int();
+        // SAFETY: This block works with *mut T derived from &mut 'a [T],
+        // which means it is delicate in Rust's borrowing model, circa 2021:
+        // &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts!
+        // Even though this block is largely safe methods, it must be almost exactly this way
+        // to prevent invalidating the raw ptrs while they're live.
+        // Thus, entering this block requires all values to use being already ready:
+        // 0. idxs we want to write to, which are used to construct the mask.
+        // 1. mask, which depends on an initial &'a [T] and the idxs.
+        // 2. actual values to scatter (self).
+        // 3. &mut [T] which will become our base ptr.
+        unsafe {
+            // Now Entering ☢️ *mut T Zone
+            let base_ptr = SimdMutPtr::splat(slice.as_mut_ptr());
+            // Ferris forgive me, I have done pointer arithmetic here.
+            let ptrs = base_ptr.wrapping_add(idxs);
+            // The ptrs have been bounds-masked to prevent memory-unsafe writes insha'allah
+            intrinsics::simd_scatter(self, ptrs, mask)
+            // Cleared ☢️ *mut T Zone
+        }
+    }
 }
 
 macro_rules! impl_simdarray_for {
diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs
index e69696e515b..ee9726e0fec 100644
--- a/crates/core_simd/src/intrinsics.rs
+++ b/crates/core_simd/src/intrinsics.rs
@@ -64,6 +64,7 @@ extern "platform-intrinsic" {
     pub(crate) fn simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U;
 
     pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
+    pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);
 
     // {s,u}add.sat
     pub(crate) fn simd_saturating_add<T>(x: T, y: T) -> T;