Merge pull request #142 from rust-lang/feature/traits

Combine LanesAtMost32 and SimdArray into a single trait "Vector" Attempts to fix some unresolved questions in #139 regarding `SimdArray` having a generic parameter. In particular, this made it not appropriate for replacing `LanesAtMost32`. Additionally, it made it impossible to use in a context where you otherwise don't know the lane count, e.g. `impl Vector`. An unfortunate side effect of this change is that scatter/gather no longer work in the trait (nor does anything else that references the lane count in a type. This requires the super-unstable `const_evaluatable_checked` feature). I also threw in the change from `as_slice` to `as_array` as discussed in zulip, and fixes #51.
author: Jubilee <46493976+workingjubilee@users.noreply.github.com> 2021-07-24 16:01:57 -0700
committer: GitHub <noreply@github.com> 2021-07-24 16:01:57 -0700
commit: 82e3405efe5ffd2bc214d32b581d5cfc1157eb8d (patch)
tree: 4051bef1cbb11b4c9c53acc0ea26eefcbd6503b2
parent: 732b7edfab46b33e3861172eb867b139a9425574 (diff)
parent: 97c25dd7465f4db60c013d7688b809a7da5388a6 (diff)
download: rust-82e3405efe5ffd2bc214d32b581d5cfc1157eb8d.tar.gz
rust-82e3405efe5ffd2bc214d32b581d5cfc1157eb8d.zip
24 files changed, 637 insertions, 820 deletions
diff --git a/crates/core_simd/src/array.rs b/crates/core_simd/src/array.rs
deleted file mode 100644
index 25c53097beb..00000000000
--- a/crates/core_simd/src/array.rs
+++ /dev/null
@@ -1,253 +0,0 @@
-use crate::intrinsics;
-use crate::masks::*;
-use crate::vector::ptr::{SimdConstPtr, SimdMutPtr};
-use crate::vector::*;
-
-/// A representation of a vector as an "array" with indices, implementing
-/// operations applicable to any vector type based solely on "having lanes",
-/// and describing relationships between vector and scalar types.
-pub trait SimdArray<const LANES: usize>: crate::LanesAtMost32
-where
-    SimdUsize<LANES>: crate::LanesAtMost32,
-    SimdIsize<LANES>: crate::LanesAtMost32,
-    MaskSize<LANES>: crate::Mask,
-    Self: Sized,
-{
-    /// The scalar type in every lane of this vector type.
-    type Scalar: Copy + Sized;
-    /// The number of lanes for this vector.
-    const LANES: usize = LANES;
-
-    /// Generates a SIMD vector with the same value in every lane.
-    #[must_use]
-    fn splat(val: Self::Scalar) -> Self;
-
-    /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
-    /// If an index is out of bounds, that lane instead selects the value from the "or" vector.
-    /// ```
-    /// # #![feature(portable_simd)]
-    /// # use core_simd::*;
-    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
-    /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
-    /// let alt = SimdI32::from_array([-5, -4, -3, -2]);
-    ///
-    /// let result = SimdI32::<4>::gather_or(&vec, idxs, alt); // Note the lane that is out-of-bounds.
-    /// assert_eq!(result, SimdI32::from_array([-5, 13, 10, 15]));
-    /// ```
-    #[must_use]
-    #[inline]
-    fn gather_or(slice: &[Self::Scalar], idxs: SimdUsize<LANES>, or: Self) -> Self {
-        Self::gather_select(slice, MaskSize::splat(true), idxs, or)
-    }
-
-    /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
-    /// Out-of-bounds indices instead use the default value for that lane (0).
-    /// ```
-    /// # #![feature(portable_simd)]
-    /// # use core_simd::*;
-    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
-    /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
-    ///
-    /// let result = SimdI32::<4>::gather_or_default(&vec, idxs); // Note the lane that is out-of-bounds.
-    /// assert_eq!(result, SimdI32::from_array([0, 13, 10, 15]));
-    /// ```
-    #[must_use]
-    #[inline]
-    fn gather_or_default(slice: &[Self::Scalar], idxs: SimdUsize<LANES>) -> Self
-    where
-        Self::Scalar: Default,
-    {
-        Self::gather_or(slice, idxs, Self::splat(Self::Scalar::default()))
-    }
-
-    /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
-    /// Out-of-bounds or masked indices instead select the value from the "or" vector.
-    /// ```
-    /// # #![feature(portable_simd)]
-    /// # use core_simd::*;
-    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
-    /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
-    /// let alt = SimdI32::from_array([-5, -4, -3, -2]);
-    /// let mask = MaskSize::from_array([true, true, true, false]); // Note the mask of the last lane.
-    ///
-    /// let result = SimdI32::<4>::gather_select(&vec, mask, idxs, alt); // Note the lane that is out-of-bounds.
-    /// assert_eq!(result, SimdI32::from_array([-5, 13, 10, -2]));
-    /// ```
-    #[must_use]
-    #[inline]
-    fn gather_select(
-        slice: &[Self::Scalar],
-        mask: MaskSize<LANES>,
-        idxs: SimdUsize<LANES>,
-        or: Self,
-    ) -> Self {
-        let mask = (mask & idxs.lanes_lt(SimdUsize::splat(slice.len()))).to_int();
-        let base_ptr = SimdConstPtr::splat(slice.as_ptr());
-        // Ferris forgive me, I have done pointer arithmetic here.
-        let ptrs = base_ptr.wrapping_add(idxs);
-        // SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
-        unsafe { intrinsics::simd_gather(or, ptrs, mask) }
-    }
-
-    /// SIMD scatter: write a SIMD vector's values into a slice, using potentially discontiguous indices.
-    /// Out-of-bounds indices are not written.
-    /// `scatter` writes "in order", so if an index receives two writes, only the last is guaranteed.
-    /// ```
-    /// # #![feature(portable_simd)]
-    /// # use core_simd::*;
-    /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
-    /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 0]);
-    /// let vals = SimdI32::from_array([-27, 82, -41, 124]);
-    ///
-    /// vals.scatter(&mut vec, idxs); // index 0 receives two writes.
-    /// assert_eq!(vec, vec![124, 11, 12, 82, 14, 15, 16, 17, 18]);
-    /// ```
-    #[inline]
-    fn scatter(self, slice: &mut [Self::Scalar], idxs: SimdUsize<LANES>) {
-        self.scatter_select(slice, MaskSize::splat(true), idxs)
-    }
-
-    /// SIMD scatter: write a SIMD vector's values into a slice, using potentially discontiguous indices.
-    /// Out-of-bounds or masked indices are not written.
-    /// `scatter_select` writes "in order", so if an index receives two writes, only the last is guaranteed.
-    /// ```
-    /// # #![feature(portable_simd)]
-    /// # use core_simd::*;
-    /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
-    /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 0]);
-    /// let vals = SimdI32::from_array([-27, 82, -41, 124]);
-    /// let mask = MaskSize::from_array([true, true, true, false]); // Note the mask of the last lane.
-    ///
-    /// vals.scatter_select(&mut vec, mask, idxs); // index 0's second write is masked, thus omitted.
-    /// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]);
-    /// ```
-    #[inline]
-    fn scatter_select(
-        self,
-        slice: &mut [Self::Scalar],
-        mask: MaskSize<LANES>,
-        idxs: SimdUsize<LANES>,
-    ) {
-        // We must construct our scatter mask before we derive a pointer!
-        let mask = (mask & idxs.lanes_lt(SimdUsize::splat(slice.len()))).to_int();
-        // SAFETY: This block works with *mut T derived from &mut 'a [T],
-        // which means it is delicate in Rust's borrowing model, circa 2021:
-        // &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts!
-        // Even though this block is largely safe methods, it must be almost exactly this way
-        // to prevent invalidating the raw ptrs while they're live.
-        // Thus, entering this block requires all values to use being already ready:
-        // 0. idxs we want to write to, which are used to construct the mask.
-        // 1. mask, which depends on an initial &'a [T] and the idxs.
-        // 2. actual values to scatter (self).
-        // 3. &mut [T] which will become our base ptr.
-        unsafe {
-            // Now Entering ☢️ *mut T Zone
-            let base_ptr = SimdMutPtr::splat(slice.as_mut_ptr());
-            // Ferris forgive me, I have done pointer arithmetic here.
-            let ptrs = base_ptr.wrapping_add(idxs);
-            // The ptrs have been bounds-masked to prevent memory-unsafe writes insha'allah
-            intrinsics::simd_scatter(self, ptrs, mask)
-            // Cleared ☢️ *mut T Zone
-        }
-    }
-}
-
-macro_rules! impl_simdarray_for {
-    ($simd:ident {type Scalar = $scalar:ident;}) => {
-        impl<const LANES: usize> SimdArray<LANES> for $simd<LANES>
-            where SimdUsize<LANES>: crate::LanesAtMost32,
-            SimdIsize<LANES>: crate::LanesAtMost32,
-            MaskSize<LANES>: crate::Mask,
-            Self: crate::LanesAtMost32,
-        {
-            type Scalar = $scalar;
-
-            #[must_use]
-            #[inline]
-            fn splat(val: Self::Scalar) -> Self {
-                [val; LANES].into()
-            }
-        }
-    };
-
-    ($simd:ident $impl:tt) => {
-        impl<const LANES: usize> SimdArray<LANES> for $simd<LANES>
-            where SimdUsize<LANES>: crate::LanesAtMost32,
-            SimdIsize<LANES>: crate::LanesAtMost32,
-            MaskSize<LANES>: crate::Mask,
-            Self: crate::LanesAtMost32,
-        $impl
-    }
-}
-
-impl_simdarray_for! {
-    SimdUsize {
-        type Scalar = usize;
-    }
-}
-
-impl_simdarray_for! {
-    SimdIsize {
-        type Scalar = isize;
-    }
-}
-
-impl_simdarray_for! {
-    SimdI8 {
-        type Scalar = i8;
-    }
-}
-
-impl_simdarray_for! {
-    SimdI16 {
-        type Scalar = i16;
-    }
-}
-
-impl_simdarray_for! {
-    SimdI32 {
-        type Scalar = i32;
-    }
-}
-
-impl_simdarray_for! {
-    SimdI64 {
-        type Scalar = i64;
-    }
-}
-
-impl_simdarray_for! {
-    SimdU8 {
-        type Scalar = u8;
-    }
-}
-
-impl_simdarray_for! {
-    SimdU16 {
-        type Scalar = u16;
-    }
-}
-
-impl_simdarray_for! {
-    SimdU32 {
-        type Scalar = u32;
-    }
-}
-
-impl_simdarray_for! {
-    SimdU64 {
-        type Scalar = u64;
-    }
-}
-
-impl_simdarray_for! {
-    SimdF32 {
-        type Scalar = f32;
-    }
-}
-
-impl_simdarray_for! {
-    SimdF64 {
-        type Scalar = f64;
-    }
-}
diff --git a/crates/core_simd/src/comparisons.rs b/crates/core_simd/src/comparisons.rs
index e8d11406c09..c5e9be9015f 100644
--- a/crates/core_simd/src/comparisons.rs
+++ b/crates/core_simd/src/comparisons.rs
@@ -1,13 +1,11 @@
-use crate::LanesAtMost32;
+use crate::{LaneCount, SupportedLaneCount};
 
 macro_rules! implement_mask_ops {
     { $($vector:ident => $mask:ident ($inner_ty:ident),)* } => {
         $(
             impl<const LANES: usize> crate::$vector<LANES>
             where
-                crate::$vector<LANES>: LanesAtMost32,
-                crate::$inner_ty<LANES>: LanesAtMost32,
-                crate::$mask<LANES>: crate::Mask,
+                LaneCount<LANES>: SupportedLaneCount,
             {
                 /// Test if each lane is equal to the corresponding lane in `other`.
                 #[inline]
diff --git a/crates/core_simd/src/first.rs b/crates/core_simd/src/first.rs
deleted file mode 100644
index 50602829d48..00000000000
--- a/crates/core_simd/src/first.rs
+++ /dev/null
@@ -1,124 +0,0 @@
-/// Implements common traits on the specified vector `$name`, holding multiple `$lanes` of `$type`.
-macro_rules! impl_vector {
-    { $name:ident, $type:ty } => {
-        impl<const LANES: usize> $name<LANES> where Self: crate::LanesAtMost32 {
-            /// Construct a SIMD vector by setting all lanes to the given value.
-            pub const fn splat(value: $type) -> Self {
-                Self([value; LANES])
-            }
-
-            /// Returns a slice containing the entire SIMD vector.
-            pub const fn as_slice(&self) -> &[$type] {
-                &self.0
-            }
-
-            /// Returns a mutable slice containing the entire SIMD vector.
-            pub fn as_mut_slice(&mut self) -> &mut [$type] {
-                &mut self.0
-            }
-
-            /// Converts an array to a SIMD vector.
-            pub const fn from_array(array: [$type; LANES]) -> Self {
-                Self(array)
-            }
-
-            /// Converts a SIMD vector to an array.
-            pub const fn to_array(self) -> [$type; LANES] {
-                // workaround for rust-lang/rust#80108
-                // TODO fix this
-                #[cfg(target_arch = "wasm32")]
-                {
-                    let mut arr = [self.0[0]; LANES];
-                    let mut i = 0;
-                    while i < LANES {
-                        arr[i] = self.0[i];
-                        i += 1;
-                    }
-                    arr
-                }
-
-                #[cfg(not(target_arch = "wasm32"))]
-                {
-                    self.0
-                }
-            }
-        }
-
-        impl<const LANES: usize> Copy for $name<LANES> where Self: crate::LanesAtMost32 {}
-
-        impl<const LANES: usize> Clone for $name<LANES> where Self: crate::LanesAtMost32 {
-            #[inline]
-            fn clone(&self) -> Self {
-                *self
-            }
-        }
-
-        impl<const LANES: usize> Default for $name<LANES> where Self: crate::LanesAtMost32 {
-            #[inline]
-            fn default() -> Self {
-                Self::splat(<$type>::default())
-            }
-        }
-
-        impl<const LANES: usize> PartialEq for $name<LANES> where Self: crate::LanesAtMost32 {
-            #[inline]
-            fn eq(&self, other: &Self) -> bool {
-                // TODO use SIMD equality
-                self.to_array() == other.to_array()
-            }
-        }
-
-        impl<const LANES: usize> PartialOrd for $name<LANES> where Self: crate::LanesAtMost32 {
-            #[inline]
-            fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
-                // TODO use SIMD equalitya
-                self.to_array().partial_cmp(other.as_ref())
-            }
-        }
-
-        // array references
-        impl<const LANES: usize> AsRef<[$type; LANES]> for $name<LANES> where Self: crate::LanesAtMost32 {
-            #[inline]
-            fn as_ref(&self) -> &[$type; LANES] {
-                &self.0
-            }
-        }
-
-        impl<const LANES: usize> AsMut<[$type; LANES]> for $name<LANES> where Self: crate::LanesAtMost32 {
-            #[inline]
-            fn as_mut(&mut self) -> &mut [$type; LANES] {
-                &mut self.0
-            }
-        }
-
-        // slice references
-        impl<const LANES: usize> AsRef<[$type]> for $name<LANES> where Self: crate::LanesAtMost32 {
-            #[inline]
-            fn as_ref(&self) -> &[$type] {
-                &self.0
-            }
-        }
-
-        impl<const LANES: usize> AsMut<[$type]> for $name<LANES> where Self: crate::LanesAtMost32 {
-            #[inline]
-            fn as_mut(&mut self) -> &mut [$type] {
-                &mut self.0
-            }
-        }
-
-        // vector/array conversion
-        impl<const LANES: usize> From<[$type; LANES]> for $name<LANES> where Self: crate::LanesAtMost32 {
-            fn from(array: [$type; LANES]) -> Self {
-                Self(array)
-            }
-        }
-
-        impl <const LANES: usize> From<$name<LANES>> for [$type; LANES] where $name<LANES>: crate::LanesAtMost32 {
-            fn from(vector: $name<LANES>) -> Self {
-                vector.to_array()
-            }
-        }
-
-        impl_shuffle_2pow_lanes!{ $name }
-    }
-}
diff --git a/crates/core_simd/src/fmt.rs b/crates/core_simd/src/fmt.rs
index 1d5010843eb..78ae5ce3fce 100644
--- a/crates/core_simd/src/fmt.rs
+++ b/crates/core_simd/src/fmt.rs
@@ -35,7 +35,7 @@ macro_rules! impl_fmt_trait {
             $( // repeat trait
                 impl<const LANES: usize> core::fmt::$trait for crate::$type<LANES>
                 where
-                    Self: crate::LanesAtMost32,
+                    crate::LaneCount<LANES>: crate::SupportedLaneCount,
                 {
                     fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
                         $format(self.as_ref(), f)
diff --git a/crates/core_simd/src/iter.rs b/crates/core_simd/src/iter.rs
index c1c4c645db6..0020ea5f201 100644
--- a/crates/core_simd/src/iter.rs
+++ b/crates/core_simd/src/iter.rs
@@ -1,8 +1,10 @@
+use crate::{LaneCount, SupportedLaneCount};
+
 macro_rules! impl_traits {
     { $type:ident } => {
         impl<const LANES: usize> core::iter::Sum<Self> for crate::$type<LANES>
         where
-            Self: crate::LanesAtMost32,
+            LaneCount<LANES>: SupportedLaneCount,
         {
             fn sum<I: core::iter::Iterator<Item = Self>>(iter: I) -> Self {
                 iter.fold(Default::default(), core::ops::Add::add)
@@ -11,7 +13,7 @@ macro_rules! impl_traits {
 
         impl<const LANES: usize> core::iter::Product<Self> for crate::$type<LANES>
         where
-            Self: crate::LanesAtMost32,
+            LaneCount<LANES>: SupportedLaneCount,
         {
             fn product<I: core::iter::Iterator<Item = Self>>(iter: I) -> Self {
                 iter.fold(Default::default(), core::ops::Mul::mul)
@@ -20,7 +22,7 @@ macro_rules! impl_traits {
 
         impl<'a, const LANES: usize> core::iter::Sum<&'a Self> for crate::$type<LANES>
         where
-            Self: crate::LanesAtMost32,
+            LaneCount<LANES>: SupportedLaneCount,
         {
             fn sum<I: core::iter::Iterator<Item = &'a Self>>(iter: I) -> Self {
                 iter.fold(Default::default(), core::ops::Add::add)
@@ -29,7 +31,7 @@ macro_rules! impl_traits {
 
         impl<'a, const LANES: usize> core::iter::Product<&'a Self> for crate::$type<LANES>
         where
-            Self: crate::LanesAtMost32,
+            LaneCount<LANES>: SupportedLaneCount,
         {
             fn product<I: core::iter::Iterator<Item = &'a Self>>(iter: I) -> Self {
                 iter.fold(Default::default(), core::ops::Mul::mul)
diff --git a/crates/core_simd/src/lane_count.rs b/crates/core_simd/src/lane_count.rs
new file mode 100644
index 00000000000..8fe204dff98
--- /dev/null
+++ b/crates/core_simd/src/lane_count.rs
@@ -0,0 +1,43 @@
+mod sealed {
+    pub trait Sealed {}
+}
+use sealed::Sealed;
+
+/// A type representing a vector lane count.
+pub struct LaneCount<const LANES: usize>;
+
+/// Helper trait for vector lane counts.
+pub trait SupportedLaneCount: Sealed {
+    /// The bitmask representation of a mask.
+    type BitMask: Copy + Default + AsRef<[u8]> + AsMut<[u8]>;
+
+    #[doc(hidden)]
+    type IntBitMask;
+}
+
+impl<const LANES: usize> Sealed for LaneCount<LANES> {}
+
+impl SupportedLaneCount for LaneCount<1> {
+    type BitMask = [u8; 1];
+    type IntBitMask = u8;
+}
+impl SupportedLaneCount for LaneCount<2> {
+    type BitMask = [u8; 1];
+    type IntBitMask = u8;
+}
+impl SupportedLaneCount for LaneCount<4> {
+    type BitMask = [u8; 1];
+    type IntBitMask = u8;
+}
+impl SupportedLaneCount for LaneCount<8> {
+    type BitMask = [u8; 1];
+    type IntBitMask = u8;
+}
+impl SupportedLaneCount for LaneCount<16> {
+    type BitMask = [u8; 2];
+    type IntBitMask = u16;
+}
+impl SupportedLaneCount for LaneCount<32> {
+    type BitMask = [u8; 4];
+    type IntBitMask = u32;
+}
diff --git a/crates/core_simd/src/lanes_at_most_32.rs b/crates/core_simd/src/lanes_at_most_32.rs
deleted file mode 100644
index 2d84b1306ea..00000000000
--- a/crates/core_simd/src/lanes_at_most_32.rs
+++ /dev/null
@@ -1,54 +0,0 @@
-/// Implemented for vectors that are supported by the implementation.
-pub trait LanesAtMost32: sealed::Sealed {
-    #[doc(hidden)]
-    type BitMask: Into<u64>;
-}
-
-mod sealed {
-    pub trait Sealed {}
-}
-
-macro_rules! impl_for {
-    { $name:ident } => {
-        impl<const LANES: usize> sealed::Sealed for $name<LANES>
-        where
-            $name<LANES>: LanesAtMost32,
-        {}
-
-        impl LanesAtMost32 for $name<1> {
-            type BitMask = u8;
-        }
-        impl LanesAtMost32 for $name<2> {
-            type BitMask = u8;
-        }
-        impl LanesAtMost32 for $name<4> {
-            type BitMask = u8;
-        }
-        impl LanesAtMost32 for $name<8> {
-            type BitMask = u8;
-        }
-        impl LanesAtMost32 for $name<16> {
-            type BitMask = u16;
-        }
-        impl LanesAtMost32 for $name<32> {
-            type BitMask = u32;
-        }
-    }
-}
-
-use crate::*;
-
-impl_for! { SimdU8 }
-impl_for! { SimdU16 }
-impl_for! { SimdU32 }
-impl_for! { SimdU64 }
-impl_for! { SimdUsize }
-
-impl_for! { SimdI8 }
-impl_for! { SimdI16 }
-impl_for! { SimdI32 }
-impl_for! { SimdI64 }
-impl_for! { SimdIsize }
-
-impl_for! { SimdF32 }
-impl_for! { SimdF64 }
diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs
index e48f8062d2c..d8149efe9c7 100644
--- a/crates/core_simd/src/lib.rs
+++ b/crates/core_simd/src/lib.rs
@@ -13,8 +13,6 @@
 //! Portable SIMD module.
 
 #[macro_use]
-mod first;
-#[macro_use]
 mod permute;
 #[macro_use]
 mod reduction;
@@ -29,20 +27,16 @@ mod comparisons;
 mod fmt;
 mod intrinsics;
 mod iter;
+mod math;
 mod ops;
 mod round;
 mod vendor;
 
-mod math;
-
-mod lanes_at_most_32;
-pub use lanes_at_most_32::LanesAtMost32;
+mod lane_count;
+pub use lane_count::*;
 
 mod masks;
 pub use masks::*;
 
 mod vector;
 pub use vector::*;
-
-mod array;
-pub use array::SimdArray;
diff --git a/crates/core_simd/src/masks/mod.rs b/crates/core_simd/src/masks.rs
index 1d6b2e45224..d3338a6d366 100644
--- a/crates/core_simd/src/masks/mod.rs
+++ b/crates/core_simd/src/masks.rs
@@ -4,15 +4,15 @@
 
 #[cfg_attr(
     not(all(target_arch = "x86_64", target_feature = "avx512f")),
-    path = "full_masks.rs"
+    path = "masks/full_masks.rs"
 )]
 #[cfg_attr(
     all(target_arch = "x86_64", target_feature = "avx512f"),
-    path = "bitmask.rs"
+    path = "masks/bitmask.rs"
 )]
 mod mask_impl;
 
-use crate::{LanesAtMost32, SimdI16, SimdI32, SimdI64, SimdI8, SimdIsize};
+use crate::{SimdI16, SimdI32, SimdI64, SimdI8, SimdIsize};
 
 mod sealed {
     pub trait Sealed {}
@@ -20,12 +20,12 @@ mod sealed {
 
 /// Helper trait for mask types.
 pub trait Mask: sealed::Sealed {
-    /// The bitmask representation of a mask.
-    type BitMask: Copy + Default + AsRef<[u8]> + AsMut<[u8]>;
+    /// The number of lanes for this mask.
+    const LANES: usize;
 
-    // TODO remove this when rustc intrinsics are more flexible
-    #[doc(hidden)]
-    type IntBitMask;
+    /// Generates a mask with the same value in every lane.
+    #[must_use]
+    fn splat(val: bool) -> Self;
 }
 
 macro_rules! define_opaque_mask {
@@ -38,45 +38,30 @@ macro_rules! define_opaque_mask {
         #[allow(non_camel_case_types)]
         pub struct $name<const LANES: usize>($inner_ty)
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask;
+            crate::LaneCount<LANES>: crate::SupportedLaneCount;
 
         impl<const LANES: usize> sealed::Sealed for $name<LANES>
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {}
-        impl Mask for $name<1> {
-            type BitMask = [u8; 1];
-            type IntBitMask = u8;
-        }
-        impl Mask for $name<2> {
-            type BitMask = [u8; 1];
-            type IntBitMask = u8;
-        }
-        impl Mask for $name<4> {
-            type BitMask = [u8; 1];
-            type IntBitMask = u8;
-        }
-        impl Mask for $name<8> {
-            type BitMask = [u8; 1];
-            type IntBitMask = u8;
-        }
-        impl Mask for $name<16> {
-            type BitMask = [u8; 2];
-            type IntBitMask = u16;
-        }
-        impl Mask for $name<32> {
-            type BitMask = [u8; 4];
-            type IntBitMask = u32;
+
+        impl<const LANES: usize> Mask for $name<LANES>
+        where
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
+        {
+            const LANES: usize = LANES;
+
+            #[inline]
+            fn splat(value: bool) -> Self {
+                Self::splat(value)
+            }
         }
 
         impl_opaque_mask_reductions! { $name, $bits_ty }
 
         impl<const LANES: usize> $name<LANES>
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             /// Construct a mask by setting all lanes to the given value.
             pub fn splat(value: bool) -> Self {
@@ -175,21 +160,20 @@ macro_rules! define_opaque_mask {
             }
 
             /// Convert this mask to a bitmask, with one bit set per lane.
-            pub fn to_bitmask(self) -> <Self as Mask>::BitMask {
-                self.0.to_bitmask::<Self>()
+            pub fn to_bitmask(self) -> <crate::LaneCount<LANES> as crate::SupportedLaneCount>::BitMask {
+                self.0.to_bitmask()
             }
 
             /// Convert a bitmask to a mask.
-            pub fn from_bitmask(bitmask: <Self as Mask>::BitMask) -> Self {
-                Self(<$inner_ty>::from_bitmask::<Self>(bitmask))
+            pub fn from_bitmask(bitmask: <crate::LaneCount<LANES> as crate::SupportedLaneCount>::BitMask) -> Self {
+                Self(<$inner_ty>::from_bitmask(bitmask))
             }
         }
 
         // vector/array conversion
         impl<const LANES: usize> From<[bool; LANES]> for $name<LANES>
         where
-            $bits_ty<LANES>: crate::LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             fn from(array: [bool; LANES]) -> Self {
                 Self::from_array(array)
@@ -198,8 +182,7 @@ macro_rules! define_opaque_mask {
 
         impl <const LANES: usize> From<$name<LANES>> for [bool; LANES]
         where
-            $bits_ty<LANES>: crate::LanesAtMost32,
-            $name<LANES>: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             fn from(vector: $name<LANES>) -> Self {
                 vector.to_array()
@@ -208,14 +191,12 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> Copy for $name<LANES>
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {}
 
         impl<const LANES: usize> Clone for $name<LANES>
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             #[inline]
             fn clone(&self) -> Self {
@@ -225,8 +206,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> Default for $name<LANES>
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             #[inline]
             fn default() -> Self {
@@ -236,8 +216,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> PartialEq for $name<LANES>
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             #[inline]
             fn eq(&self, other: &Self) -> bool {
@@ -247,8 +226,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> PartialOrd for $name<LANES>
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             #[inline]
             fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
@@ -258,8 +236,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::fmt::Debug for $name<LANES>
         where
-            $bits_ty<LANES>: crate::LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
                 f.debug_list()
@@ -270,8 +247,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitAnd for $name<LANES>
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             type Output = Self;
             #[inline]
@@ -282,8 +258,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitAnd<bool> for $name<LANES>
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             type Output = Self;
             #[inline]
@@ -294,8 +269,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitAnd<$name<LANES>> for bool
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            $name<LANES>: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -306,8 +280,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitOr for $name<LANES>
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             type Output = Self;
             #[inline]
@@ -318,8 +291,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitOr<bool> for $name<LANES>
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             type Output = Self;
             #[inline]
@@ -330,8 +302,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitOr<$name<LANES>> for bool
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            $name<LANES>: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -342,8 +313,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitXor for $name<LANES>
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             type Output = Self;
             #[inline]
@@ -354,8 +324,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitXor<bool> for $name<LANES>
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             type Output = Self;
             #[inline]
@@ -366,8 +335,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitXor<$name<LANES>> for bool
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            $name<LANES>: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -378,8 +346,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::Not for $name<LANES>
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -390,8 +357,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitAndAssign for $name<LANES>
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             #[inline]
             fn bitand_assign(&mut self, rhs: Self) {
@@ -401,8 +367,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitAndAssign<bool> for $name<LANES>
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             #[inline]
             fn bitand_assign(&mut self, rhs: bool) {
@@ -412,8 +377,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitOrAssign for $name<LANES>
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             #[inline]
             fn bitor_assign(&mut self, rhs: Self) {
@@ -423,8 +387,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitOrAssign<bool> for $name<LANES>
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             #[inline]
             fn bitor_assign(&mut self, rhs: bool) {
@@ -434,8 +397,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitXorAssign for $name<LANES>
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             #[inline]
             fn bitxor_assign(&mut self, rhs: Self) {
@@ -445,8 +407,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitXorAssign<bool> for $name<LANES>
         where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             #[inline]
             fn bitxor_assign(&mut self, rhs: bool) {
@@ -460,7 +421,7 @@ define_opaque_mask! {
     /// Mask for vectors with `LANES` 8-bit elements.
     ///
     /// The layout of this type is unspecified.
-    struct Mask8<const LANES: usize>(mask_impl::Mask8<Self, LANES>);
+    struct Mask8<const LANES: usize>(mask_impl::Mask8<LANES>);
     @bits SimdI8
 }
 
@@ -468,7 +429,7 @@ define_opaque_mask! {
     /// Mask for vectors with `LANES` 16-bit elements.
     ///
     /// The layout of this type is unspecified.
-    struct Mask16<const LANES: usize>(mask_impl::Mask16<Self, LANES>);
+    struct Mask16<const LANES: usize>(mask_impl::Mask16<LANES>);
     @bits SimdI16
 }
 
@@ -476,7 +437,7 @@ define_opaque_mask! {
     /// Mask for vectors with `LANES` 32-bit elements.
     ///
     /// The layout of this type is unspecified.
-    struct Mask32<const LANES: usize>(mask_impl::Mask32<Self, LANES>);
+    struct Mask32<const LANES: usize>(mask_impl::Mask32<LANES>);
     @bits SimdI32
 }
 
@@ -484,7 +445,7 @@ define_opaque_mask! {
     /// Mask for vectors with `LANES` 64-bit elements.
     ///
     /// The layout of this type is unspecified.
-    struct Mask64<const LANES: usize>(mask_impl::Mask64<Self, LANES>);
+    struct Mask64<const LANES: usize>(mask_impl::Mask64<LANES>);
     @bits SimdI64
 }
 
@@ -492,7 +453,7 @@ define_opaque_mask! {
     /// Mask for vectors with `LANES` pointer-width elements.
     ///
     /// The layout of this type is unspecified.
-    struct MaskSize<const LANES: usize>(mask_impl::MaskSize<Self, LANES>);
+    struct MaskSize<const LANES: usize>(mask_impl::MaskSize<LANES>);
     @bits SimdIsize
 }
 
@@ -555,10 +516,7 @@ macro_rules! impl_from {
         $(
         impl<const LANES: usize> From<$from<LANES>> for $to<LANES>
         where
-            crate::$from_inner<LANES>: crate::LanesAtMost32,
-            crate::$to_inner<LANES>: crate::LanesAtMost32,
-            $from<LANES>: Mask,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             fn from(value: $from<LANES>) -> Self {
                 Self(value.0.into())
diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs
index fab136d2b24..b6897728988 100644
--- a/crates/core_simd/src/masks/bitmask.rs
+++ b/crates/core_simd/src/masks/bitmask.rs
@@ -1,50 +1,81 @@
-use crate::Mask;
-use core::marker::PhantomData;
+use crate::{LaneCount, SupportedLaneCount};
 
 /// Helper trait for limiting int conversion types
 pub trait ConvertToInt {}
-impl<const LANES: usize> ConvertToInt for crate::SimdI8<LANES> where Self: crate::LanesAtMost32 {}
-impl<const LANES: usize> ConvertToInt for crate::SimdI16<LANES> where Self: crate::LanesAtMost32 {}
-impl<const LANES: usize> ConvertToInt for crate::SimdI32<LANES> where Self: crate::LanesAtMost32 {}
-impl<const LANES: usize> ConvertToInt for crate::SimdI64<LANES> where Self: crate::LanesAtMost32 {}
-impl<const LANES: usize> ConvertToInt for crate::SimdIsize<LANES> where Self: crate::LanesAtMost32 {}
+impl<const LANES: usize> ConvertToInt for crate::SimdI8<LANES> where
+    LaneCount<LANES>: SupportedLaneCount
+{
+}
+impl<const LANES: usize> ConvertToInt for crate::SimdI16<LANES> where
+    LaneCount<LANES>: SupportedLaneCount
+{
+}
+impl<const LANES: usize> ConvertToInt for crate::SimdI32<LANES> where
+    LaneCount<LANES>: SupportedLaneCount
+{
+}
+impl<const LANES: usize> ConvertToInt for crate::SimdI64<LANES> where
+    LaneCount<LANES>: SupportedLaneCount
+{
+}
+impl<const LANES: usize> ConvertToInt for crate::SimdIsize<LANES> where
+    LaneCount<LANES>: SupportedLaneCount
+{
+}
 
 /// A mask where each lane is represented by a single bit.
 #[repr(transparent)]
-pub struct BitMask<T: Mask, const LANES: usize>(T::BitMask, PhantomData<[(); LANES]>);
+pub struct BitMask<const LANES: usize>(<LaneCount<LANES> as SupportedLaneCount>::BitMask)
+where
+    LaneCount<LANES>: SupportedLaneCount;
 
-impl<T: Mask, const LANES: usize> Copy for BitMask<T, LANES> {}
+impl<const LANES: usize> Copy for BitMask<LANES> where LaneCount<LANES>: SupportedLaneCount {}
 
-impl<T: Mask, const LANES: usize> Clone for BitMask<T, LANES> {
+impl<const LANES: usize> Clone for BitMask<LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+{
     fn clone(&self) -> Self {
         *self
     }
 }
 
-impl<T: Mask, const LANES: usize> PartialEq for BitMask<T, LANES> {
+impl<const LANES: usize> PartialEq for BitMask<LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+{
     fn eq(&self, other: &Self) -> bool {
         self.0.as_ref() == other.0.as_ref()
     }
 }
 
-impl<T: Mask, const LANES: usize> PartialOrd for BitMask<T, LANES> {
+impl<const LANES: usize> PartialOrd for BitMask<LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+{
     fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
         self.0.as_ref().partial_cmp(other.0.as_ref())
     }
 }
 
-impl<T: Mask, const LANES: usize> Eq for BitMask<T, LANES> {}
+impl<const LANES: usize> Eq for BitMask<LANES> where LaneCount<LANES>: SupportedLaneCount {}
 
-impl<T: Mask, const LANES: usize> Ord for BitMask<T, LANES> {
+impl<const LANES: usize> Ord for BitMask<LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+{
     fn cmp(&self, other: &Self) -> core::cmp::Ordering {
         self.0.as_ref().cmp(other.0.as_ref())
     }
 }
 
-impl<T: Mask, const LANES: usize> BitMask<T, LANES> {
+impl<const LANES: usize> BitMask<LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+{
     #[inline]
     pub fn splat(value: bool) -> Self {
-        let mut mask = T::BitMask::default();
+        let mut mask = <LaneCount<LANES> as SupportedLaneCount>::BitMask::default();
         if value {
             mask.as_mut().fill(u8::MAX)
         } else {
@@ -53,12 +84,12 @@ impl<T: Mask, const LANES: usize> BitMask<T, LANES> {
         if LANES % 8 > 0 {
             *mask.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - LANES % 8);
         }
-        Self(mask, PhantomData)
+        Self(mask)
     }
 
     #[inline]
     pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
-        (self.0.as_ref()[lane / 8] >> lane % 8) & 0x1 > 0
+        (self.0.as_ref()[lane / 8] >> (lane % 8)) & 0x1 > 0
     }
 
     #[inline]
@@ -72,7 +103,8 @@ impl<T: Mask, const LANES: usize> BitMask<T, LANES> {
         V: ConvertToInt + Default + core::ops::Not<Output = V>,
     {
         unsafe {
-            let mask: T::IntBitMask = core::mem::transmute_copy(&self);
+            let mask: <LaneCount<LANES> as SupportedLaneCount>::IntBitMask =
+                core::mem::transmute_copy(&self);
             crate::intrinsics::simd_select_bitmask(mask, !V::default(), V::default())
         }
     }
@@ -80,33 +112,29 @@ impl<T: Mask, const LANES: usize> BitMask<T, LANES> {
     #[inline]
     pub unsafe fn from_int_unchecked<V>(value: V) -> Self
     where
-        V: crate::LanesAtMost32,
+        V: crate::Vector,
     {
         // TODO remove the transmute when rustc is more flexible
         assert_eq!(
-            core::mem::size_of::<T::IntBitMask>(),
-            core::mem::size_of::<T::BitMask>()
+            core::mem::size_of::<<crate::LaneCount::<LANES> as crate::SupportedLaneCount>::BitMask>(
+            ),
+            core::mem::size_of::<
+                <crate::LaneCount::<LANES> as crate::SupportedLaneCount>::IntBitMask,
+            >(),
         );
-        let mask: T::IntBitMask = crate::intrinsics::simd_bitmask(value);
-        Self(core::mem::transmute_copy(&mask), PhantomData)
+        let mask: <LaneCount<LANES> as SupportedLaneCount>::IntBitMask =
+            crate::intrinsics::simd_bitmask(value);
+        Self(core::mem::transmute_copy(&mask))
     }
 
     #[inline]
-    pub fn to_bitmask<U: Mask>(self) -> U::BitMask {
-        assert_eq!(
-            core::mem::size_of::<T::BitMask>(),
-            core::mem::size_of::<U::BitMask>()
-        );
-        unsafe { core::mem::transmute_copy(&self.0) }
+    pub fn to_bitmask(self) -> <LaneCount<LANES> as SupportedLaneCount>::BitMask {
+        self.0
     }
 
     #[inline]
-    pub fn from_bitmask<U: Mask>(bitmask: U::BitMask) -> Self {
-        assert_eq!(
-            core::mem::size_of::<T::BitMask>(),
-            core::mem::size_of::<U::BitMask>()
-        );
-        unsafe { core::mem::transmute_copy(&bitmask) }
+    pub fn from_bitmask(bitmask: <LaneCount<LANES> as SupportedLaneCount>::BitMask) -> Self {
+        Self(bitmask)
     }
 
     #[inline]
@@ -120,9 +148,10 @@ impl<T: Mask, const LANES: usize> BitMask<T, LANES> {
     }
 }
 
-impl<T: Mask, const LANES: usize> core::ops::BitAnd for BitMask<T, LANES>
+impl<const LANES: usize> core::ops::BitAnd for BitMask<LANES>
 where
-    T::BitMask: Default + AsRef<[u8]> + AsMut<[u8]>,
+    LaneCount<LANES>: SupportedLaneCount,
+    <LaneCount<LANES> as SupportedLaneCount>::BitMask: Default + AsRef<[u8]> + AsMut<[u8]>,
 {
     type Output = Self;
     #[inline]
@@ -134,9 +163,10 @@ where
     }
 }
 
-impl<T: Mask, const LANES: usize> core::ops::BitOr for BitMask<T, LANES>
+impl<const LANES: usize> core::ops::BitOr for BitMask<LANES>
 where
-    T::BitMask: Default + AsRef<[u8]> + AsMut<[u8]>,
+    LaneCount<LANES>: SupportedLaneCount,
+    <LaneCount<LANES> as SupportedLaneCount>::BitMask: Default + AsRef<[u8]> + AsMut<[u8]>,
 {
     type Output = Self;
     #[inline]
@@ -148,7 +178,10 @@ where
     }
 }
 
-impl<T: Mask, const LANES: usize> core::ops::BitXor for BitMask<T, LANES> {
+impl<const LANES: usize> core::ops::BitXor for BitMask<LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+{
     type Output = Self;
     #[inline]
     fn bitxor(mut self, rhs: Self) -> Self::Output {
@@ -159,7 +192,10 @@ impl<T: Mask, const LANES: usize> core::ops::BitXor for BitMask<T, LANES> {
     }
 }
 
-impl<T: Mask, const LANES: usize> core::ops::Not for BitMask<T, LANES> {
+impl<const LANES: usize> core::ops::Not for BitMask<LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+{
     type Output = Self;
     #[inline]
     fn not(mut self) -> Self::Output {
@@ -173,31 +209,8 @@ impl<T: Mask, const LANES: usize> core::ops::Not for BitMask<T, LANES> {
     }
 }
 
-pub type Mask8<T, const LANES: usize> = BitMask<T, LANES>;
-pub type Mask16<T, const LANES: usize> = BitMask<T, LANES>;
-pub type Mask32<T, const LANES: usize> = BitMask<T, LANES>;
-pub type Mask64<T, const LANES: usize> = BitMask<T, LANES>;
-pub type MaskSize<T, const LANES: usize> = BitMask<T, LANES>;
-
-macro_rules! impl_from {
-    { $from:ident ($from_inner:ident) => $($to:ident ($to_inner:ident)),* } => {
-        $(
-        impl<const LANES: usize> From<$from<crate::$from<LANES>, LANES>> for $to<crate::$to<LANES>, LANES>
-        where
-            crate::$from_inner<LANES>: crate::LanesAtMost32,
-            crate::$to_inner<LANES>: crate::LanesAtMost32,
-            crate::$from<LANES>: crate::Mask,
-            crate::$to<LANES>: crate::Mask,
-        {
-            fn from(value: $from<crate::$from<LANES>, LANES>) -> Self {
-                unsafe { core::mem::transmute_copy(&value) }
-            }
-        }
-        )*
-    }
-}
-impl_from! { Mask8 (SimdI8) => Mask16 (SimdI16), Mask32 (SimdI32), Mask64 (SimdI64), MaskSize (SimdIsize) }
-impl_from! { Mask16 (SimdI16) => Mask32 (SimdI32), Mask64 (SimdI64), MaskSize (SimdIsize), Mask8 (SimdI8) }
-impl_from! { Mask32 (SimdI32) => Mask64 (SimdI64), MaskSize (SimdIsize), Mask8 (SimdI8), Mask16 (SimdI16) }
-impl_from! { Mask64 (SimdI64) => MaskSize (SimdIsize), Mask8 (SimdI8), Mask16 (SimdI16), Mask32 (SimdI32) }
-impl_from! { MaskSize (SimdIsize) => Mask8 (SimdI8), Mask16 (SimdI16), Mask32 (SimdI32), Mask64 (SimdI64) }
+pub type Mask8<const LANES: usize> = BitMask<LANES>;
+pub type Mask16<const LANES: usize> = BitMask<LANES>;
+pub type Mask32<const LANES: usize> = BitMask<LANES>;
+pub type Mask64<const LANES: usize> = BitMask<LANES>;
+pub type MaskSize<const LANES: usize> = BitMask<LANES>;
diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs
index 7d98333ef60..af36571134e 100644
--- a/crates/core_simd/src/masks/full_masks.rs
+++ b/crates/core_simd/src/masks/full_masks.rs
@@ -1,8 +1,5 @@
 //! Masks that take up full SIMD vector registers.
 
-use crate::Mask;
-use core::marker::PhantomData;
-
 macro_rules! define_mask {
     {
         $(#[$attr:meta])*
@@ -12,20 +9,20 @@ macro_rules! define_mask {
     } => {
         $(#[$attr])*
         #[repr(transparent)]
-        pub struct $name<T: Mask, const $lanes: usize>(crate::$type<$lanes2>, PhantomData<T>)
+        pub struct $name<const $lanes: usize>(crate::$type<$lanes>)
         where
-            crate::$type<LANES>: crate::LanesAtMost32;
+            crate::LaneCount<$lanes>: crate::SupportedLaneCount;
 
         impl_full_mask_reductions! { $name, $type }
 
-        impl<T: Mask, const LANES: usize> Copy for $name<T, LANES>
+        impl<const LANES: usize> Copy for $name<LANES>
         where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {}
 
-        impl<T: Mask, const LANES: usize> Clone for $name<T, LANES>
+        impl<const LANES: usize> Clone for $name<LANES>
         where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             #[inline]
             fn clone(&self) -> Self {
@@ -33,41 +30,41 @@ macro_rules! define_mask {
             }
         }
 
-        impl<T: Mask, const LANES: usize> PartialEq for $name<T, LANES>
+        impl<const LANES: usize> PartialEq for $name<LANES>
         where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             fn eq(&self, other: &Self) -> bool {
                 self.0 == other.0
             }
         }
 
-        impl<T: Mask, const LANES: usize> PartialOrd for $name<T, LANES>
+        impl<const LANES: usize> PartialOrd for $name<LANES>
         where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
                 self.0.partial_cmp(&other.0)
             }
         }
 
-        impl<T: Mask, const LANES: usize> Eq for $name<T, LANES>
+        impl<const LANES: usize> Eq for $name<LANES>
         where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {}
 
-        impl<T: Mask, const LANES: usize> Ord for $name<T, LANES>
+        impl<const LANES: usize> Ord for $name<LANES>
         where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             fn cmp(&self, other: &Self) -> core::cmp::Ordering {
                 self.0.cmp(&other.0)
             }
         }
 
-        impl<T: Mask, const LANES: usize> $name<T, LANES>
+        impl<const LANES: usize> $name<LANES>
         where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             pub fn splat(value: bool) -> Self {
                 Self(
@@ -78,7 +75,6 @@ macro_rules! define_mask {
                             0
                         }
                     ),
-                    PhantomData,
                 )
             }
 
@@ -103,16 +99,19 @@ macro_rules! define_mask {
 
             #[inline]
             pub unsafe fn from_int_unchecked(value: crate::$type<LANES>) -> Self {
-                Self(value, PhantomData)
+                Self(value)
             }
 
             #[inline]
-            pub fn to_bitmask<U: crate::Mask>(self) -> U::BitMask {
+            pub fn to_bitmask(self) -> <crate::LaneCount::<LANES> as crate::SupportedLaneCount>::BitMask {
                 unsafe {
-                    // TODO remove the transmute when rustc is more flexible
-                    assert_eq!(core::mem::size_of::<U::IntBitMask>(), core::mem::size_of::<U::BitMask>());
-                    let mask: U::IntBitMask = crate::intrinsics::simd_bitmask(self.0);
-                    let mut bitmask: U::BitMask = core::mem::transmute_copy(&mask);
+                    // TODO remove the transmute when rustc can use arrays of u8 as bitmasks
+                    assert_eq!(
+                        core::mem::size_of::<<crate::LaneCount::<LANES> as crate::SupportedLaneCount>::BitMask>(),
+                        core::mem::size_of::<<crate::LaneCount::<LANES> as crate::SupportedLaneCount>::IntBitMask>(),
+                    );
+                    let bitmask: <crate::LaneCount::<LANES> as crate::SupportedLaneCount>::IntBitMask = crate::intrinsics::simd_bitmask(self.0);
+                    let mut bitmask: <crate::LaneCount::<LANES> as crate::SupportedLaneCount>::BitMask = core::mem::transmute_copy(&bitmask);
 
                     // There is a bug where LLVM appears to implement this operation with the wrong
                     // bit order.
@@ -128,7 +127,7 @@ macro_rules! define_mask {
             }
 
             #[inline]
-            pub fn from_bitmask<U: crate::Mask>(mut bitmask: U::BitMask) -> Self {
+            pub fn from_bitmask(mut bitmask: <crate::LaneCount::<LANES> as crate::SupportedLaneCount>::BitMask) -> Self {
                 unsafe {
                     // There is a bug where LLVM appears to implement this operation with the wrong
                     // bit order.
@@ -139,9 +138,12 @@ macro_rules! define_mask {
                         }
                     }
 
-                    // TODO remove the transmute when rustc is more flexible
-                    assert_eq!(core::mem::size_of::<U::IntBitMask>(), core::mem::size_of::<U::BitMask>());
-                    let bitmask: U::IntBitMask = core::mem::transmute_copy(&bitmask);
+                    // TODO remove the transmute when rustc can use arrays of u8 as bitmasks
+                    assert_eq!(
+                        core::mem::size_of::<<crate::LaneCount::<LANES> as crate::SupportedLaneCount>::BitMask>(),
+                        core::mem::size_of::<<crate::LaneCount::<LANES> as crate::SupportedLaneCount>::IntBitMask>(),
+                    );
+                    let bitmask: <crate::LaneCount::<LANES> as crate::SupportedLaneCount>::IntBitMask = core::mem::transmute_copy(&bitmask);
 
                     Self::from_int_unchecked(crate::intrinsics::simd_select_bitmask(
                         bitmask,
@@ -152,56 +154,56 @@ macro_rules! define_mask {
             }
         }
 
-        impl<T: Mask, const LANES: usize> core::convert::From<$name<T, LANES>> for crate::$type<LANES>
+        impl<const LANES: usize> core::convert::From<$name<LANES>> for crate::$type<LANES>
         where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
-            fn from(value: $name<T, LANES>) -> Self {
+            fn from(value: $name<LANES>) -> Self {
                 value.0
             }
         }
 
-        impl<T: Mask, const LANES: usize> core::ops::BitAnd for $name<T, LANES>
+        impl<const LANES: usize> core::ops::BitAnd for $name<LANES>
         where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             type Output = Self;
             #[inline]
             fn bitand(self, rhs: Self) -> Self {
-                Self(self.0 & rhs.0, PhantomData)
+                Self(self.0 & rhs.0)
             }
         }
 
-        impl<T: Mask, const LANES: usize> core::ops::BitOr for $name<T, LANES>
+        impl<const LANES: usize> core::ops::BitOr for $name<LANES>
         where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             type Output = Self;
             #[inline]
             fn bitor(self, rhs: Self) -> Self {
-                Self(self.0 | rhs.0, PhantomData)
+                Self(self.0 | rhs.0)
             }
         }
 
-        impl<T: Mask, const LANES: usize> core::ops::BitXor for $name<T, LANES>
+        impl<const LANES: usize> core::ops::BitXor for $name<LANES>
         where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             type Output = Self;
             #[inline]
             fn bitxor(self, rhs: Self) -> Self::Output {
-                Self(self.0 ^ rhs.0, PhantomData)
+                Self(self.0 ^ rhs.0)
             }
         }
 
-        impl<T: Mask, const LANES: usize> core::ops::Not for $name<T, LANES>
+        impl<const LANES: usize> core::ops::Not for $name<LANES>
         where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             type Output = Self;
             #[inline]
             fn not(self) -> Self::Output {
-                Self(!self.0, PhantomData)
+                Self(!self.0)
             }
         }
     }
@@ -240,14 +242,11 @@ define_mask! {
 macro_rules! impl_from {
     { $from:ident ($from_inner:ident) => $($to:ident ($to_inner:ident)),* } => {
         $(
-        impl<const LANES: usize, T, U> From<$from<T, LANES>> for $to<U, LANES>
+        impl<const LANES: usize> From<$from<LANES>> for $to<LANES>
         where
-            crate::$from_inner<LANES>: crate::LanesAtMost32,
-            crate::$to_inner<LANES>: crate::LanesAtMost32,
-            T: crate::Mask,
-            U: crate::Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
-            fn from(value: $from<T, LANES>) -> Self {
+            fn from(value: $from<LANES>) -> Self {
                 let mut new = Self::splat(false);
                 for i in 0..LANES {
                     unsafe { new.set_unchecked(i, value.test_unchecked(i)) }
diff --git a/crates/core_simd/src/math.rs b/crates/core_simd/src/math.rs
index 7290a28362f..28720eb13e3 100644
--- a/crates/core_simd/src/math.rs
+++ b/crates/core_simd/src/math.rs
@@ -1,6 +1,6 @@
 macro_rules! impl_uint_arith {
     ($(($name:ident, $n:ident)),+) => {
-        $( impl<const LANES: usize> $name<LANES> where Self: crate::LanesAtMost32 {
+        $( impl<const LANES: usize> $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
 
             /// Lanewise saturating add.
             ///
@@ -44,7 +44,7 @@ macro_rules! impl_uint_arith {
 
 macro_rules! impl_int_arith {
     ($(($name:ident, $n:ident)),+) => {
-        $( impl<const LANES: usize> $name<LANES> where Self: crate::LanesAtMost32 {
+        $( impl<const LANES: usize> $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
 
             /// Lanewise saturating add.
             ///
diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs
index c7037d2acbc..c75090aab9c 100644
--- a/crates/core_simd/src/ops.rs
+++ b/crates/core_simd/src/ops.rs
@@ -1,4 +1,4 @@
-use crate::LanesAtMost32;
+use crate::{LaneCount, SupportedLaneCount};
 
 /// Checks if the right-hand side argument of a left- or right-shift would cause overflow.
 fn invalid_shift_rhs<T>(rhs: T) -> bool
@@ -16,7 +16,7 @@ macro_rules! impl_ref_ops {
     {
         impl<const $lanes:ident: usize> core::ops::$trait:ident<$rhs:ty> for $type:ty
         where
-            $($bound:path: LanesAtMost32,)*
+            LaneCount<$lanes2:ident>: SupportedLaneCount,
         {
             type Output = $output:ty;
 
@@ -26,7 +26,7 @@ macro_rules! impl_ref_ops {
     } => {
         impl<const $lanes: usize> core::ops::$trait<$rhs> for $type
         where
-            $($bound: LanesAtMost32,)*
+            LaneCount<$lanes2>: SupportedLaneCount,
         {
             type Output = $output;
 
@@ -36,7 +36,7 @@ macro_rules! impl_ref_ops {
 
         impl<const $lanes: usize> core::ops::$trait<&'_ $rhs> for $type
         where
-            $($bound: LanesAtMost32,)*
+            LaneCount<$lanes2>: SupportedLaneCount,
         {
             type Output = <$type as core::ops::$trait<$rhs>>::Output;
 
@@ -48,7 +48,7 @@ macro_rules! impl_ref_ops {
 
         impl<const $lanes: usize> core::ops::$trait<$rhs> for &'_ $type
         where
-            $($bound: LanesAtMost32,)*
+            LaneCount<$lanes2>: SupportedLaneCount,
         {
             type Output = <$type as core::ops::$trait<$rhs>>::Output;
 
@@ -60,7 +60,7 @@ macro_rules! impl_ref_ops {
 
         impl<const $lanes: usize> core::ops::$trait<&'_ $rhs> for &'_ $type
         where
-            $($bound: LanesAtMost32,)*
+            LaneCount<$lanes2>: SupportedLaneCount,
         {
             type Output = <$type as core::ops::$trait<$rhs>>::Output;
 
@@ -75,7 +75,7 @@ macro_rules! impl_ref_ops {
     {
         impl<const $lanes:ident: usize> core::ops::$trait:ident<$rhs:ty> for $type:ty
         where
-            $($bound:path: LanesAtMost32,)*
+            LaneCount<$lanes2:ident>: SupportedLaneCount,
         {
             $(#[$attrs:meta])*
             fn $fn:ident(&mut $self_tok:ident, $rhs_arg:ident: $rhs_arg_ty:ty) $body:tt
@@ -83,7 +83,7 @@ macro_rules! impl_ref_ops {
     } => {
         impl<const $lanes: usize> core::ops::$trait<$rhs> for $type
         where
-            $($bound: LanesAtMost32,)*
+            LaneCount<$lanes2>: SupportedLaneCount,
         {
             $(#[$attrs])*
             fn $fn(&mut $self_tok, $rhs_arg: $rhs_arg_ty) $body
@@ -91,7 +91,7 @@ macro_rules! impl_ref_ops {
 
         impl<const $lanes: usize> core::ops::$trait<&'_ $rhs> for $type
         where
-            $($bound: LanesAtMost32,)*
+            LaneCount<$lanes2>: SupportedLaneCount,
         {
             $(#[$attrs])*
             fn $fn(&mut $self_tok, $rhs_arg: &$rhs_arg_ty) {
@@ -104,7 +104,7 @@ macro_rules! impl_ref_ops {
     {
         impl<const $lanes:ident: usize> core::ops::$trait:ident for $type:ty
         where
-            $($bound:path: LanesAtMost32,)*
+            LaneCount<$lanes2:ident>: SupportedLaneCount,
         {
             type Output = $output:ty;
             fn $fn:ident($self_tok:ident) -> Self::Output $body:tt
@@ -112,7 +112,7 @@ macro_rules! impl_ref_ops {
     } => {
         impl<const $lanes: usize> core::ops::$trait for $type
         where
-            $($bound: LanesAtMost32,)*
+            LaneCount<$lanes2>: SupportedLaneCount,
         {
             type Output = $output;
             fn $fn($self_tok) -> Self::Output $body
@@ -120,7 +120,7 @@ macro_rules! impl_ref_ops {
 
         impl<const $lanes: usize> core::ops::$trait for &'_ $type
         where
-            $($bound: LanesAtMost32,)*
+            LaneCount<$lanes2>: SupportedLaneCount,
         {
             type Output = <$type as core::ops::$trait>::Output;
             fn $fn($self_tok) -> Self::Output {
@@ -167,7 +167,7 @@ macro_rules! impl_op {
         impl_ref_ops! {
             impl<const LANES: usize> core::ops::Not for crate::$type<LANES>
             where
-                crate::$type<LANES>: LanesAtMost32,
+                LaneCount<LANES>: SupportedLaneCount,
             {
                 type Output = Self;
                 fn not(self) -> Self::Output {
@@ -181,7 +181,7 @@ macro_rules! impl_op {
         impl_ref_ops! {
             impl<const LANES: usize> core::ops::Neg for crate::$type<LANES>
             where
-                crate::$type<LANES>: LanesAtMost32,
+                LaneCount<LANES>: SupportedLaneCount,
             {
                 type Output = Self;
                 fn neg(self) -> Self::Output {
@@ -194,7 +194,7 @@ macro_rules! impl_op {
     { impl Index for $type:ident, $scalar:ty } => {
         impl<I, const LANES: usize> core::ops::Index<I> for crate::$type<LANES>
         where
-            Self: LanesAtMost32,
+            LaneCount<LANES>: SupportedLaneCount,
             I: core::slice::SliceIndex<[$scalar]>,
         {
             type Output = I::Output;
@@ -206,7 +206,7 @@ macro_rules! impl_op {
 
         impl<I, const LANES: usize> core::ops::IndexMut<I> for crate::$type<LANES>
         where
-            Self: LanesAtMost32,
+            LaneCount<LANES>: SupportedLaneCount,
             I: core::slice::SliceIndex<[$scalar]>,
         {
             fn index_mut(&mut self, index: I) -> &mut Self::Output {
@@ -221,7 +221,7 @@ macro_rules! impl_op {
         impl_ref_ops! {
             impl<const LANES: usize> core::ops::$trait<Self> for crate::$type<LANES>
             where
-                crate::$type<LANES>: LanesAtMost32,
+                LaneCount<LANES>: SupportedLaneCount,
             {
                 type Output = Self;
 
@@ -237,7 +237,7 @@ macro_rules! impl_op {
         impl_ref_ops! {
             impl<const LANES: usize> core::ops::$trait<$scalar> for crate::$type<LANES>
             where
-                crate::$type<LANES>: LanesAtMost32,
+                LaneCount<LANES>: SupportedLaneCount,
             {
                 type Output = Self;
 
@@ -251,7 +251,7 @@ macro_rules! impl_op {
         impl_ref_ops! {
             impl<const LANES: usize> core::ops::$trait<crate::$type<LANES>> for $scalar
             where
-                crate::$type<LANES>: LanesAtMost32,
+                LaneCount<LANES>: SupportedLaneCount,
             {
                 type Output = crate::$type<LANES>;
 
@@ -265,7 +265,7 @@ macro_rules! impl_op {
         impl_ref_ops! {
             impl<const LANES: usize> core::ops::$assign_trait<Self> for crate::$type<LANES>
             where
-                crate::$type<LANES>: LanesAtMost32,
+                LaneCount<LANES>: SupportedLaneCount,
             {
                 #[inline]
                 fn $assign_trait_fn(&mut self, rhs: Self) {
@@ -279,7 +279,7 @@ macro_rules! impl_op {
         impl_ref_ops! {
             impl<const LANES: usize> core::ops::$assign_trait<$scalar> for crate::$type<LANES>
             where
-                crate::$type<LANES>: LanesAtMost32,
+                LaneCount<LANES>: SupportedLaneCount,
             {
                 #[inline]
                 fn $assign_trait_fn(&mut self, rhs: $scalar) {
@@ -325,13 +325,13 @@ macro_rules! impl_unsigned_int_ops {
                 impl_ref_ops! {
                     impl<const LANES: usize> core::ops::Div<Self> for crate::$vector<LANES>
                     where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                     {
                         type Output = Self;
 
                         #[inline]
                         fn div(self, rhs: Self) -> Self::Output {
-                            if rhs.as_slice()
+                            if rhs.as_array()
                                 .iter()
                                 .any(|x| *x == 0)
                             {
@@ -340,8 +340,8 @@ macro_rules! impl_unsigned_int_ops {
 
                             // Guards for div(MIN, -1),
                             // this check only applies to signed ints
-                            if <$scalar>::MIN != 0 && self.as_slice().iter()
-                                    .zip(rhs.as_slice().iter())
+                            if <$scalar>::MIN != 0 && self.as_array().iter()
+                                    .zip(rhs.as_array().iter())
                                     .any(|(x,y)| *x == <$scalar>::MIN && *y == -1 as _) {
                                 panic!("attempt to divide with overflow");
                             }
@@ -353,7 +353,7 @@ macro_rules! impl_unsigned_int_ops {
                 impl_ref_ops! {
                     impl<const LANES: usize> core::ops::Div<$scalar> for crate::$vector<LANES>
                     where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                     {
                         type Output = Self;
 
@@ -363,7 +363,7 @@ macro_rules! impl_unsigned_int_ops {
                                 panic!("attempt to divide by zero");
                             }
                             if <$scalar>::MIN != 0 &&
-                                self.as_slice().iter().any(|x| *x == <$scalar>::MIN) &&
+                                self.as_array().iter().any(|x| *x == <$scalar>::MIN) &&
                                 rhs == -1 as _ {
                                     panic!("attempt to divide with overflow");
                             }
@@ -376,7 +376,7 @@ macro_rules! impl_unsigned_int_ops {
                 impl_ref_ops! {
                     impl<const LANES: usize> core::ops::Div<crate::$vector<LANES>> for $scalar
                     where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                     {
                         type Output = crate::$vector<LANES>;
 
@@ -390,7 +390,7 @@ macro_rules! impl_unsigned_int_ops {
                 impl_ref_ops! {
                     impl<const LANES: usize> core::ops::DivAssign<Self> for crate::$vector<LANES>
                     where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                     {
                         #[inline]
                         fn div_assign(&mut self, rhs: Self) {
@@ -402,7 +402,7 @@ macro_rules! impl_unsigned_int_ops {
                 impl_ref_ops! {
                     impl<const LANES: usize> core::ops::DivAssign<$scalar> for crate::$vector<LANES>
                     where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                     {
                         #[inline]
                         fn div_assign(&mut self, rhs: $scalar) {
@@ -415,13 +415,13 @@ macro_rules! impl_unsigned_int_ops {
                 impl_ref_ops! {
                     impl<const LANES: usize> core::ops::Rem<Self> for crate::$vector<LANES>
                     where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                     {
                         type Output = Self;
 
                         #[inline]
                         fn rem(self, rhs: Self) -> Self::Output {
-                            if rhs.as_slice()
+                            if rhs.as_array()
                                 .iter()
                                 .any(|x| *x == 0)
                             {
@@ -430,8 +430,8 @@ macro_rules! impl_unsigned_int_ops {
 
                             // Guards for rem(MIN, -1)
                             // this branch applies the check only to signed ints
-                            if <$scalar>::MIN != 0 && self.as_slice().iter()
-                                    .zip(rhs.as_slice().iter())
+                            if <$scalar>::MIN != 0 && self.as_array().iter()
+                                    .zip(rhs.as_array().iter())
                                     .any(|(x,y)| *x == <$scalar>::MIN && *y == -1 as _) {
                                 panic!("attempt to calculate the remainder with overflow");
                             }
@@ -443,7 +443,7 @@ macro_rules! impl_unsigned_int_ops {
                 impl_ref_ops! {
                     impl<const LANES: usize> core::ops::Rem<$scalar> for crate::$vector<LANES>
                     where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                     {
                         type Output = Self;
 
@@ -453,7 +453,7 @@ macro_rules! impl_unsigned_int_ops {
                                 panic!("attempt to calculate the remainder with a divisor of zero");
                             }
                             if <$scalar>::MIN != 0 &&
-                                self.as_slice().iter().any(|x| *x == <$scalar>::MIN) &&
+                                self.as_array().iter().any(|x| *x == <$scalar>::MIN) &&
                                 rhs == -1 as _ {
                                     panic!("attempt to calculate the remainder with overflow");
                             }
@@ -466,7 +466,7 @@ macro_rules! impl_unsigned_int_ops {
                 impl_ref_ops! {
                     impl<const LANES: usize> core::ops::Rem<crate::$vector<LANES>> for $scalar
                     where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                     {
                         type Output = crate::$vector<LANES>;
 
@@ -480,7 +480,7 @@ macro_rules! impl_unsigned_int_ops {
                 impl_ref_ops! {
                     impl<const LANES: usize> core::ops::RemAssign<Self> for crate::$vector<LANES>
                     where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                     {
                         #[inline]
                         fn rem_assign(&mut self, rhs: Self) {
@@ -492,7 +492,7 @@ macro_rules! impl_unsigned_int_ops {
                 impl_ref_ops! {
                     impl<const LANES: usize> core::ops::RemAssign<$scalar> for crate::$vector<LANES>
                     where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                     {
                         #[inline]
                         fn rem_assign(&mut self, rhs: $scalar) {
@@ -505,14 +505,14 @@ macro_rules! impl_unsigned_int_ops {
                 impl_ref_ops! {
                     impl<const LANES: usize> core::ops::Shl<Self> for crate::$vector<LANES>
                     where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                     {
                         type Output = Self;
 
                         #[inline]
                         fn shl(self, rhs: Self) -> Self::Output {
                             // TODO there is probably a better way of doing this
-                            if rhs.as_slice()
+                            if rhs.as_array()
                                 .iter()
                                 .copied()
                                 .any(invalid_shift_rhs)
@@ -527,7 +527,7 @@ macro_rules! impl_unsigned_int_ops {
                 impl_ref_ops! {
                     impl<const LANES: usize> core::ops::Shl<$scalar> for crate::$vector<LANES>
                     where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                     {
                         type Output = Self;
 
@@ -546,7 +546,7 @@ macro_rules! impl_unsigned_int_ops {
                 impl_ref_ops! {
                     impl<const LANES: usize> core::ops::ShlAssign<Self> for crate::$vector<LANES>
                     where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                     {
                         #[inline]
                         fn shl_assign(&mut self, rhs: Self) {
@@ -558,7 +558,7 @@ macro_rules! impl_unsigned_int_ops {
                 impl_ref_ops! {
                     impl<const LANES: usize> core::ops::ShlAssign<$scalar> for crate::$vector<LANES>
                     where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                     {
                         #[inline]
                         fn shl_assign(&mut self, rhs: $scalar) {
@@ -570,14 +570,14 @@ macro_rules! impl_unsigned_int_ops {
                 impl_ref_ops! {
                     impl<const LANES: usize> core::ops::Shr<Self> for crate::$vector<LANES>
                     where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                     {
                         type Output = Self;
 
                         #[inline]
                         fn shr(self, rhs: Self) -> Self::Output {
                             // TODO there is probably a better way of doing this
-                            if rhs.as_slice()
+                            if rhs.as_array()
                                 .iter()
                                 .copied()
                                 .any(invalid_shift_rhs)
@@ -592,7 +592,7 @@ macro_rules! impl_unsigned_int_ops {
                 impl_ref_ops! {
                     impl<const LANES: usize> core::ops::Shr<$scalar> for crate::$vector<LANES>
                     where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                     {
                         type Output = Self;
 
@@ -611,7 +611,7 @@ macro_rules! impl_unsigned_int_ops {
                 impl_ref_ops! {
                     impl<const LANES: usize> core::ops::ShrAssign<Self> for crate::$vector<LANES>
                     where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                     {
                         #[inline]
                         fn shr_assign(&mut self, rhs: Self) {
@@ -623,7 +623,7 @@ macro_rules! impl_unsigned_int_ops {
                 impl_ref_ops! {
                     impl<const LANES: usize> core::ops::ShrAssign<$scalar> for crate::$vector<LANES>
                     where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                     {
                         #[inline]
                         fn shr_assign(&mut self, rhs: $scalar) {
diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs
index 8687d1af516..df227d09e34 100644
--- a/crates/core_simd/src/reduction.rs
+++ b/crates/core_simd/src/reduction.rs
@@ -2,7 +2,7 @@ macro_rules! impl_integer_reductions {
     { $name:ident, $scalar:ty } => {
         impl<const LANES: usize> crate::$name<LANES>
         where
-            Self: crate::LanesAtMost32
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             /// Horizontal wrapping add.  Returns the sum of the lanes of the vector, with wrapping addition.
             #[inline]
@@ -56,7 +56,7 @@ macro_rules! impl_float_reductions {
     { $name:ident, $scalar:ty } => {
         impl<const LANES: usize> crate::$name<LANES>
         where
-            Self: crate::LanesAtMost32
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
 
             /// Horizontal add.  Returns the sum of the lanes of the vector.
@@ -64,7 +64,7 @@ macro_rules! impl_float_reductions {
             pub fn horizontal_sum(self) -> $scalar {
                 // LLVM sum is inaccurate on i586
                 if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
-                    self.as_slice().iter().sum()
+                    self.as_array().iter().sum()
                 } else {
                     unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0.) }
                 }
@@ -75,7 +75,7 @@ macro_rules! impl_float_reductions {
             pub fn horizontal_product(self) -> $scalar {
                 // LLVM product is inaccurate on i586
                 if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
-                    self.as_slice().iter().product()
+                    self.as_array().iter().product()
                 } else {
                     unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1.) }
                 }
@@ -104,9 +104,9 @@ macro_rules! impl_float_reductions {
 
 macro_rules! impl_full_mask_reductions {
     { $name:ident, $bits_ty:ident } => {
-        impl<T: crate::Mask, const LANES: usize> $name<T, LANES>
+        impl<const LANES: usize> $name<LANES>
         where
-            crate::$bits_ty<LANES>: crate::LanesAtMost32
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             #[inline]
             pub fn any(self) -> bool {
@@ -125,8 +125,7 @@ macro_rules! impl_opaque_mask_reductions {
     { $name:ident, $bits_ty:ident } => {
         impl<const LANES: usize> $name<LANES>
         where
-            crate::$bits_ty<LANES>: crate::LanesAtMost32,
-            $name<LANES>: crate::Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             /// Returns true if any lane is set, or false otherwise.
             #[inline]
diff --git a/crates/core_simd/src/round.rs b/crates/core_simd/src/round.rs
index 281851c68ac..74cae0cf989 100644
--- a/crates/core_simd/src/round.rs
+++ b/crates/core_simd/src/round.rs
@@ -5,7 +5,7 @@ macro_rules! implement {
         #[cfg(feature = "std")]
         impl<const LANES: usize> crate::$type<LANES>
         where
-            Self: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             /// Returns the smallest integer greater than or equal to each lane.
             #[must_use = "method returns a new vector and does not mutate the original value"]
@@ -41,13 +41,7 @@ macro_rules! implement {
             pub fn fract(self) -> Self {
                 self - self.trunc()
             }
-        }
 
-        impl<const LANES: usize> crate::$type<LANES>
-        where
-            Self: crate::LanesAtMost32,
-            crate::$int_type<LANES>: crate::LanesAtMost32,
-        {
             /// Rounds toward zero and converts to the same-width integer type, assuming that
             /// the value is finite and fits in that type.
             ///
diff --git a/crates/core_simd/src/select.rs b/crates/core_simd/src/select.rs
index dee1d775eb8..d70e8a66b95 100644
--- a/crates/core_simd/src/select.rs
+++ b/crates/core_simd/src/select.rs
@@ -14,12 +14,10 @@ macro_rules! impl_select {
         $mask:ident ($bits_ty:ident): $($type:ident),*
     } => {
         $(
-        impl<const LANES: usize> Sealed for crate::$type<LANES> where Self: crate::LanesAtMost32 {}
+        impl<const LANES: usize> Sealed for crate::$type<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {}
         impl<const LANES: usize> Select<crate::$mask<LANES>> for crate::$type<LANES>
         where
-            crate::$mask<LANES>: crate::Mask,
-            crate::$bits_ty<LANES>: crate::LanesAtMost32,
-            Self: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             #[doc(hidden)]
             #[inline]
@@ -31,13 +29,12 @@ macro_rules! impl_select {
 
         impl<const LANES: usize> Sealed for crate::$mask<LANES>
         where
-            Self: crate::Mask,
-            crate::$bits_ty<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {}
+
         impl<const LANES: usize> Select<Self> for crate::$mask<LANES>
         where
-            Self: crate::Mask,
-            crate::$bits_ty<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             #[doc(hidden)]
             #[inline]
@@ -48,8 +45,7 @@ macro_rules! impl_select {
 
         impl<const LANES: usize> crate::$mask<LANES>
         where
-            Self: crate::Mask,
-            crate::$bits_ty<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
         {
             /// Choose lanes from two vectors.
             ///
diff --git a/crates/core_simd/src/to_bytes.rs b/crates/core_simd/src/to_bytes.rs
index a2d9cc4ef56..0823391049f 100644
--- a/crates/core_simd/src/to_bytes.rs
+++ b/crates/core_simd/src/to_bytes.rs
@@ -18,11 +18,14 @@ pub trait ToBytes: Sealed {
 macro_rules! impl_to_bytes {
     { $name:ident, $($int_width:literal -> $byte_width:literal),* } => {
         $(
-        impl Sealed for crate::$name<$int_width> where Self: crate::LanesAtMost32 {}
+        impl Sealed for crate::$name<$int_width>
+        where
+            crate::LaneCount<$int_width>: crate::SupportedLaneCount,
+        {}
+
         impl ToBytes for crate::$name<$int_width>
         where
-            Self: crate::LanesAtMost32,
-            crate::SimdU8<$byte_width>: crate::LanesAtMost32,
+            crate::LaneCount<$int_width>: crate::SupportedLaneCount,
         {
             type Bytes = crate::SimdU8<$byte_width>;
             fn to_bytes_impl(self) -> Self::Bytes {
@@ -36,7 +39,8 @@ macro_rules! impl_to_bytes {
 
         impl<const LANES: usize> crate::$name<LANES>
         where
-            Self: ToBytes + crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
+            Self: ToBytes,
         {
             /// Return the memory representation of this integer as a byte array in native byte
             /// order.
diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
index 95214ea8864..1f6df533767 100644
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@@ -1,3 +1,6 @@
+#[macro_use]
+mod vector_impl;
+
 mod float;
 mod int;
 mod uint;
@@ -8,3 +11,22 @@ pub use uint::*;
 
 // Vectors of pointers are not for public use at the current time.
 pub(crate) mod ptr;
+
+mod sealed {
+    pub trait Sealed {}
+}
+
+/// A representation of a vector as an "array" with indices, implementing
+/// operations applicable to any vector type based solely on "having lanes",
+/// and describing relationships between vector and scalar types.
+pub trait Vector: sealed::Sealed {
+    /// The scalar type in every lane of this vector type.
+    type Scalar: Copy + Sized;
+
+    /// The number of lanes for this vector.
+    const LANES: usize;
+
+    /// Generates a SIMD vector with the same value in every lane.
+    #[must_use]
+    fn splat(val: Self::Scalar) -> Self;
+}
diff --git a/crates/core_simd/src/vector/float.rs b/crates/core_simd/src/vector/float.rs
index b6e9b61f71f..bdeccd037a8 100644
--- a/crates/core_simd/src/vector/float.rs
+++ b/crates/core_simd/src/vector/float.rs
@@ -1,5 +1,7 @@
 #![allow(non_camel_case_types)]
 
+use crate::{LaneCount, SupportedLaneCount};
+
 /// Implements inherent methods for a float vector `$name` containing multiple
 /// `$lanes` of float `$type`, which uses `$bits_ty` as its binary
 /// representation. Called from `define_float_vector!`.
@@ -10,8 +12,7 @@ macro_rules! impl_float_vector {
 
         impl<const LANES: usize> $name<LANES>
         where
-            Self: crate::LanesAtMost32,
-            crate::$bits_ty<LANES>: crate::LanesAtMost32,
+            LaneCount<LANES>: SupportedLaneCount,
         {
             /// Raw transmutation to an unsigned integer vector type with the
             /// same size and number of lanes.
@@ -74,15 +75,7 @@ macro_rules! impl_float_vector {
             pub fn to_radians(self) -> Self {
                 self * Self::splat($type::to_radians(1.))
             }
-        }
 
-        impl<const LANES: usize> $name<LANES>
-        where
-            Self: crate::LanesAtMost32,
-            crate::$bits_ty<LANES>: crate::LanesAtMost32,
-            crate::$mask_impl_ty<LANES>: crate::LanesAtMost32,
-            crate::$mask_ty<LANES>: crate::Mask,
-        {
             /// Returns true for each lane if it has a positive sign, including
             /// `+0.0`, `NaN`s with positive sign bit and positive infinity.
             #[inline]
@@ -197,7 +190,7 @@ macro_rules! impl_float_vector {
 #[repr(simd)]
 pub struct SimdF32<const LANES: usize>([f32; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;
 
 impl_float_vector! { SimdF32, f32, SimdU32, Mask32, SimdI32 }
 
@@ -205,7 +198,7 @@ impl_float_vector! { SimdF32, f32, SimdU32, Mask32, SimdI32 }
 #[repr(simd)]
 pub struct SimdF64<const LANES: usize>([f64; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;
 
 impl_float_vector! { SimdF64, f64, SimdU64, Mask64, SimdI64 }
 
diff --git a/crates/core_simd/src/vector/int.rs b/crates/core_simd/src/vector/int.rs
index 15ad1a7193a..73c737762fb 100644
--- a/crates/core_simd/src/vector/int.rs
+++ b/crates/core_simd/src/vector/int.rs
@@ -1,36 +1,39 @@
 #![allow(non_camel_case_types)]
 
+use crate::{LaneCount, SupportedLaneCount};
+
 /// Implements additional integer traits (Eq, Ord, Hash) on the specified vector `$name`, holding multiple `$lanes` of `$type`.
 macro_rules! impl_integer_vector {
     { $name:ident, $type:ty, $mask_ty:ident, $mask_impl_ty:ident } => {
         impl_vector! { $name, $type }
         impl_integer_reductions! { $name, $type }
 
-        impl<const LANES: usize> Eq for $name<LANES> where Self: crate::LanesAtMost32 {}
+        impl<const LANES: usize> Eq for $name<LANES> where LaneCount<LANES>: SupportedLaneCount {}
 
-        impl<const LANES: usize> Ord for $name<LANES> where Self: crate::LanesAtMost32 {
+        impl<const LANES: usize> Ord for $name<LANES> where LaneCount<LANES>: SupportedLaneCount {
             #[inline]
             fn cmp(&self, other: &Self) -> core::cmp::Ordering {
                 // TODO use SIMD cmp
-                self.to_array().cmp(other.as_ref())
+                self.as_array().cmp(other.as_ref())
             }
         }
 
-        impl<const LANES: usize> core::hash::Hash for $name<LANES> where Self: crate::LanesAtMost32 {
+        impl<const LANES: usize> core::hash::Hash for $name<LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
             #[inline]
             fn hash<H>(&self, state: &mut H)
             where
                 H: core::hash::Hasher
             {
-                self.as_slice().hash(state)
+                self.as_array().hash(state)
             }
         }
 
         impl<const LANES: usize> $name<LANES>
         where
-            Self: crate::LanesAtMost32,
-            crate::$mask_impl_ty<LANES>: crate::LanesAtMost32,
-            crate::$mask_ty<LANES>: crate::Mask,
+            LaneCount<LANES>: SupportedLaneCount,
         {
             /// Returns true for each positive lane and false if it is zero or negative.
             #[inline]
@@ -63,7 +66,7 @@ macro_rules! impl_integer_vector {
 #[repr(simd)]
 pub struct SimdIsize<const LANES: usize>([isize; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;
 
 impl_integer_vector! { SimdIsize, isize, MaskSize, SimdIsize }
 
@@ -71,7 +74,7 @@ impl_integer_vector! { SimdIsize, isize, MaskSize, SimdIsize }
 #[repr(simd)]
 pub struct SimdI16<const LANES: usize>([i16; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;
 
 impl_integer_vector! { SimdI16, i16, Mask16, SimdI16 }
 
@@ -79,7 +82,7 @@ impl_integer_vector! { SimdI16, i16, Mask16, SimdI16 }
 #[repr(simd)]
 pub struct SimdI32<const LANES: usize>([i32; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;
 
 impl_integer_vector! { SimdI32, i32, Mask32, SimdI32 }
 
@@ -87,7 +90,7 @@ impl_integer_vector! { SimdI32, i32, Mask32, SimdI32 }
 #[repr(simd)]
 pub struct SimdI64<const LANES: usize>([i64; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;
 
 impl_integer_vector! { SimdI64, i64, Mask64, SimdI64 }
 
@@ -95,7 +98,7 @@ impl_integer_vector! { SimdI64, i64, Mask64, SimdI64 }
 #[repr(simd)]
 pub struct SimdI8<const LANES: usize>([i8; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;
 
 impl_integer_vector! { SimdI8, i8, Mask8, SimdI8 }
 
diff --git a/crates/core_simd/src/vector/ptr.rs b/crates/core_simd/src/vector/ptr.rs
index 30bef038b33..9dd1bfd0f36 100644
--- a/crates/core_simd/src/vector/ptr.rs
+++ b/crates/core_simd/src/vector/ptr.rs
@@ -1,5 +1,5 @@
 //! Private implementation details of public gather/scatter APIs.
-use crate::SimdUsize;
+use crate::{LaneCount, SimdUsize, SupportedLaneCount};
 use core::mem;
 
 /// A vector of *const T.
@@ -9,7 +9,7 @@ pub(crate) struct SimdConstPtr<T, const LANES: usize>([*const T; LANES]);
 
 impl<T, const LANES: usize> SimdConstPtr<T, LANES>
 where
-    SimdUsize<LANES>: crate::LanesAtMost32,
+    LaneCount<LANES>: SupportedLaneCount,
     T: Sized,
 {
     #[inline]
@@ -35,7 +35,7 @@ pub(crate) struct SimdMutPtr<T, const LANES: usize>([*mut T; LANES]);
 
 impl<T, const LANES: usize> SimdMutPtr<T, LANES>
 where
-    SimdUsize<LANES>: crate::LanesAtMost32,
+    LaneCount<LANES>: SupportedLaneCount,
     T: Sized,
 {
     #[inline]
diff --git a/crates/core_simd/src/vector/uint.rs b/crates/core_simd/src/vector/uint.rs
index 0429410ed6d..b19f694872a 100644
--- a/crates/core_simd/src/vector/uint.rs
+++ b/crates/core_simd/src/vector/uint.rs
@@ -1,28 +1,33 @@
 #![allow(non_camel_case_types)]
 
+use crate::{LaneCount, SupportedLaneCount};
+
 /// Implements additional integer traits (Eq, Ord, Hash) on the specified vector `$name`, holding multiple `$lanes` of `$type`.
 macro_rules! impl_unsigned_vector {
     { $name:ident, $type:ty } => {
         impl_vector! { $name, $type }
         impl_integer_reductions! { $name, $type }
 
-        impl<const LANES: usize> Eq for $name<LANES> where Self: crate::LanesAtMost32 {}
+        impl<const LANES: usize> Eq for $name<LANES> where LaneCount<LANES>: SupportedLaneCount {}
 
-        impl<const LANES: usize> Ord for $name<LANES> where Self: crate::LanesAtMost32 {
+        impl<const LANES: usize> Ord for $name<LANES> where LaneCount<LANES>: SupportedLaneCount {
             #[inline]
             fn cmp(&self, other: &Self) -> core::cmp::Ordering {
                 // TODO use SIMD cmp
-                self.to_array().cmp(other.as_ref())
+                self.as_array().cmp(other.as_ref())
             }
         }
 
-        impl<const LANES: usize> core::hash::Hash for $name<LANES> where Self: crate::LanesAtMost32 {
+        impl<const LANES: usize> core::hash::Hash for $name<LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
             #[inline]
             fn hash<H>(&self, state: &mut H)
             where
                 H: core::hash::Hasher
             {
-                self.as_slice().hash(state)
+                self.as_array().hash(state)
             }
         }
     }
@@ -32,7 +37,7 @@ macro_rules! impl_unsigned_vector {
 #[repr(simd)]
 pub struct SimdUsize<const LANES: usize>([usize; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;
 
 impl_unsigned_vector! { SimdUsize, usize }
 
@@ -40,7 +45,7 @@ impl_unsigned_vector! { SimdUsize, usize }
 #[repr(simd)]
 pub struct SimdU16<const LANES: usize>([u16; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;
 
 impl_unsigned_vector! { SimdU16, u16 }
 
@@ -48,7 +53,7 @@ impl_unsigned_vector! { SimdU16, u16 }
 #[repr(simd)]
 pub struct SimdU32<const LANES: usize>([u32; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;
 
 impl_unsigned_vector! { SimdU32, u32 }
 
@@ -56,7 +61,7 @@ impl_unsigned_vector! { SimdU32, u32 }
 #[repr(simd)]
 pub struct SimdU64<const LANES: usize>([u64; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;
 
 impl_unsigned_vector! { SimdU64, u64 }
 
@@ -64,7 +69,7 @@ impl_unsigned_vector! { SimdU64, u64 }
 #[repr(simd)]
 pub struct SimdU8<const LANES: usize>([u8; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;
 
 impl_unsigned_vector! { SimdU8, u8 }
 
diff --git a/crates/core_simd/src/vector/vector_impl.rs b/crates/core_simd/src/vector/vector_impl.rs
new file mode 100644
index 00000000000..58ea244adfc
--- /dev/null
+++ b/crates/core_simd/src/vector/vector_impl.rs
@@ -0,0 +1,257 @@
+/// Implements common traits on the specified vector `$name`, holding multiple `$lanes` of `$type`.
+macro_rules! impl_vector {
+    { $name:ident, $type:ty } => {
+        impl<const LANES: usize> crate::vector::sealed::Sealed for $name<LANES>
+        where
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
+        {}
+
+        impl<const LANES: usize> crate::vector::Vector for $name<LANES>
+        where
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
+        {
+            type Scalar = $type;
+            const LANES: usize = LANES;
+
+            #[inline]
+            fn splat(val: Self::Scalar) -> Self {
+                Self::splat(val)
+            }
+        }
+
+        impl<const LANES: usize> $name<LANES>
+        where
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
+        {
+            /// Construct a SIMD vector by setting all lanes to the given value.
+            pub const fn splat(value: $type) -> Self {
+                Self([value; LANES])
+            }
+
+            /// Returns an array reference containing the entire SIMD vector.
+            pub const fn as_array(&self) -> &[$type; LANES] {
+                &self.0
+            }
+
+            /// Returns a mutable array reference containing the entire SIMD vector.
+            pub fn as_mut_array(&mut self) -> &mut [$type; LANES] {
+                &mut self.0
+            }
+
+            /// Converts an array to a SIMD vector.
+            pub const fn from_array(array: [$type; LANES]) -> Self {
+                Self(array)
+            }
+
+            /// Converts a SIMD vector to an array.
+            pub const fn to_array(self) -> [$type; LANES] {
+                self.0
+            }
+
+            /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
+            /// If an index is out of bounds, that lane instead selects the value from the "or" vector.
+            /// ```
+            /// # #![feature(portable_simd)]
+            /// # use core_simd::*;
+            /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+            /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
+            /// let alt = SimdI32::from_array([-5, -4, -3, -2]);
+            ///
+            /// let result = SimdI32::<4>::gather_or(&vec, idxs, alt); // Note the lane that is out-of-bounds.
+            /// assert_eq!(result, SimdI32::from_array([-5, 13, 10, 15]));
+            /// ```
+            #[must_use]
+            #[inline]
+            pub fn gather_or(slice: &[$type], idxs: crate::SimdUsize<LANES>, or: Self) -> Self {
+                Self::gather_select(slice, crate::MaskSize::splat(true), idxs, or)
+            }
+
+            /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
+            /// Out-of-bounds indices instead use the default value for that lane (0).
+            /// ```
+            /// # #![feature(portable_simd)]
+            /// # use core_simd::*;
+            /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+            /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
+            ///
+            /// let result = SimdI32::<4>::gather_or_default(&vec, idxs); // Note the lane that is out-of-bounds.
+            /// assert_eq!(result, SimdI32::from_array([0, 13, 10, 15]));
+            /// ```
+            #[must_use]
+            #[inline]
+            pub fn gather_or_default(slice: &[$type], idxs: crate::SimdUsize<LANES>) -> Self {
+                Self::gather_or(slice, idxs, Self::splat(<$type>::default()))
+            }
+
+            /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
+            /// Out-of-bounds or masked indices instead select the value from the "or" vector.
+            /// ```
+            /// # #![feature(portable_simd)]
+            /// # use core_simd::*;
+            /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+            /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
+            /// let alt = SimdI32::from_array([-5, -4, -3, -2]);
+            /// let mask = MaskSize::from_array([true, true, true, false]); // Note the mask of the last lane.
+            ///
+            /// let result = SimdI32::<4>::gather_select(&vec, mask, idxs, alt); // Note the lane that is out-of-bounds.
+            /// assert_eq!(result, SimdI32::from_array([-5, 13, 10, -2]));
+            /// ```
+            #[must_use]
+            #[inline]
+            pub fn gather_select(
+                slice: &[$type],
+                mask: crate::MaskSize<LANES>,
+                idxs: crate::SimdUsize<LANES>,
+                or: Self,
+            ) -> Self
+            {
+                let mask = (mask & idxs.lanes_lt(crate::SimdUsize::splat(slice.len()))).to_int();
+                let base_ptr = crate::vector::ptr::SimdConstPtr::splat(slice.as_ptr());
+                // Ferris forgive me, I have done pointer arithmetic here.
+                let ptrs = base_ptr.wrapping_add(idxs);
+                // SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
+                unsafe { crate::intrinsics::simd_gather(or, ptrs, mask) }
+            }
+
+            /// SIMD scatter: write a SIMD vector's values into a slice, using potentially discontiguous indices.
+            /// Out-of-bounds indices are not written.
+            /// `scatter` writes "in order", so if an index receives two writes, only the last is guaranteed.
+            /// ```
+            /// # #![feature(portable_simd)]
+            /// # use core_simd::*;
+            /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+            /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 0]);
+            /// let vals = SimdI32::from_array([-27, 82, -41, 124]);
+            ///
+            /// vals.scatter(&mut vec, idxs); // index 0 receives two writes.
+            /// assert_eq!(vec, vec![124, 11, 12, 82, 14, 15, 16, 17, 18]);
+            /// ```
+            #[inline]
+            pub fn scatter(self, slice: &mut [$type], idxs: crate::SimdUsize<LANES>) {
+                self.scatter_select(slice, crate::MaskSize::splat(true), idxs)
+            }
+
+            /// SIMD scatter: write a SIMD vector's values into a slice, using potentially discontiguous indices.
+            /// Out-of-bounds or masked indices are not written.
+            /// `scatter_select` writes "in order", so if an index receives two writes, only the last is guaranteed.
+            /// ```
+            /// # #![feature(portable_simd)]
+            /// # use core_simd::*;
+            /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+            /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 0]);
+            /// let vals = SimdI32::from_array([-27, 82, -41, 124]);
+            /// let mask = MaskSize::from_array([true, true, true, false]); // Note the mask of the last lane.
+            ///
+            /// vals.scatter_select(&mut vec, mask, idxs); // index 0's second write is masked, thus omitted.
+            /// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]);
+            /// ```
+            #[inline]
+            pub fn scatter_select(
+                self,
+                slice: &mut [$type],
+                mask: crate::MaskSize<LANES>,
+                idxs: crate::SimdUsize<LANES>,
+            )
+            {
+                // We must construct our scatter mask before we derive a pointer!
+                let mask = (mask & idxs.lanes_lt(crate::SimdUsize::splat(slice.len()))).to_int();
+                // SAFETY: This block works with *mut T derived from &mut 'a [T],
+                // which means it is delicate in Rust's borrowing model, circa 2021:
+                // &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts!
+                // Even though this block is largely safe methods, it must be almost exactly this way
+                // to prevent invalidating the raw ptrs while they're live.
+                // Thus, entering this block requires all values to use being already ready:
+                // 0. idxs we want to write to, which are used to construct the mask.
+                // 1. mask, which depends on an initial &'a [T] and the idxs.
+                // 2. actual values to scatter (self).
+                // 3. &mut [T] which will become our base ptr.
+                unsafe {
+                    // Now Entering ☢️ *mut T Zone
+                    let base_ptr = crate::vector::ptr::SimdMutPtr::splat(slice.as_mut_ptr());
+                    // Ferris forgive me, I have done pointer arithmetic here.
+                    let ptrs = base_ptr.wrapping_add(idxs);
+                    // The ptrs have been bounds-masked to prevent memory-unsafe writes insha'allah
+                    crate::intrinsics::simd_scatter(self, ptrs, mask)
+                    // Cleared ☢️ *mut T Zone
+                }
+            }
+        }
+
+        impl<const LANES: usize> Copy for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {}
+
+        impl<const LANES: usize> Clone for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
+            #[inline]
+            fn clone(&self) -> Self {
+                *self
+            }
+        }
+
+        impl<const LANES: usize> Default for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
+            #[inline]
+            fn default() -> Self {
+                Self::splat(<$type>::default())
+            }
+        }
+
+        impl<const LANES: usize> PartialEq for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
+            #[inline]
+            fn eq(&self, other: &Self) -> bool {
+                // TODO use SIMD equality
+                self.to_array() == other.to_array()
+            }
+        }
+
+        impl<const LANES: usize> PartialOrd for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
+            #[inline]
+            fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+                // TODO use SIMD equalitya
+                self.to_array().partial_cmp(other.as_ref())
+            }
+        }
+
+        // array references
+        impl<const LANES: usize> AsRef<[$type; LANES]> for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
+            #[inline]
+            fn as_ref(&self) -> &[$type; LANES] {
+                &self.0
+            }
+        }
+
+        impl<const LANES: usize> AsMut<[$type; LANES]> for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
+            #[inline]
+            fn as_mut(&mut self) -> &mut [$type; LANES] {
+                &mut self.0
+            }
+        }
+
+        // slice references
+        impl<const LANES: usize> AsRef<[$type]> for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
+            #[inline]
+            fn as_ref(&self) -> &[$type] {
+                &self.0
+            }
+        }
+
+        impl<const LANES: usize> AsMut<[$type]> for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
+            #[inline]
+            fn as_mut(&mut self) -> &mut [$type] {
+                &mut self.0
+            }
+        }
+
+        // vector/array conversion
+        impl<const LANES: usize> From<[$type; LANES]> for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
+            fn from(array: [$type; LANES]) -> Self {
+                Self(array)
+            }
+        }
+
+        impl <const LANES: usize> From<$name<LANES>> for [$type; LANES] where crate::LaneCount<LANES>: crate::SupportedLaneCount {
+            fn from(vector: $name<LANES>) -> Self {
+                vector.to_array()
+            }
+        }
+
+        impl_shuffle_2pow_lanes!{ $name }
+    }
+}
diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs
index 318a7b3005e..5c6478876f3 100644
--- a/crates/test_helpers/src/lib.rs
+++ b/crates/test_helpers/src/lib.rs
@@ -335,23 +335,7 @@ macro_rules! test_lanes {
 
                 fn implementation<const $lanes: usize>()
                 where
-                    core_simd::SimdU8<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdU16<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdU32<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdU64<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdUsize<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdI8<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdI16<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdI32<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdI64<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdIsize<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdF32<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdF64<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::Mask8<$lanes>: core_simd::Mask,
-                    core_simd::Mask16<$lanes>: core_simd::Mask,
-                    core_simd::Mask32<$lanes>: core_simd::Mask,
-                    core_simd::Mask64<$lanes>: core_simd::Mask,
-                    core_simd::MaskSize<$lanes>: core_simd::Mask,
+                    core_simd::LaneCount<$lanes>: core_simd::SupportedLaneCount,
                 $body
 
                 #[cfg(target_arch = "wasm32")]
@@ -409,23 +393,7 @@ macro_rules! test_lanes_panic {
 
                 fn implementation<const $lanes: usize>()
                 where
-                    core_simd::SimdU8<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdU16<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdU32<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdU64<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdUsize<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdI8<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdI16<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdI32<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdI64<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdIsize<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdF32<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdF64<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::Mask8<$lanes>: core_simd::Mask,
-                    core_simd::Mask16<$lanes>: core_simd::Mask,
-                    core_simd::Mask32<$lanes>: core_simd::Mask,
-                    core_simd::Mask64<$lanes>: core_simd::Mask,
-                    core_simd::MaskSize<$lanes>: core_simd::Mask,
+                    core_simd::LaneCount<$lanes>: core_simd::SupportedLaneCount,
                 $body
 
                 #[test]
author	Jubilee <46493976+workingjubilee@users.noreply.github.com>	2021-07-24 16:01:57 -0700
committer	GitHub <noreply@github.com>	2021-07-24 16:01:57 -0700
commit	82e3405efe5ffd2bc214d32b581d5cfc1157eb8d (patch)
tree	4051bef1cbb11b4c9c53acc0ea26eefcbd6503b2
parent	732b7edfab46b33e3861172eb867b139a9425574 (diff)
parent	97c25dd7465f4db60c013d7688b809a7da5388a6 (diff)
download	rust-82e3405efe5ffd2bc214d32b581d5cfc1157eb8d.tar.gz rust-82e3405efe5ffd2bc214d32b581d5cfc1157eb8d.zip