about summary refs log tree commit diff
diff options
context:
space:
mode:
authorJubilee <46493976+workingjubilee@users.noreply.github.com>2021-05-01 00:02:33 -0700
committerGitHub <noreply@github.com>2021-05-01 00:02:33 -0700
commit9a063bc2ed892315c8317f08f263466af35e9279 (patch)
treea98d1282160da4bb96bbe3dd31aa7c0c676f18ff
parent5751179dc636d53b0f2368e81f548c1c04a7b4f2 (diff)
parent589fce03131225f9167b6b90c6382f40ea22edb6 (diff)
downloadrust-9a063bc2ed892315c8317f08f263466af35e9279.tar.gz
rust-9a063bc2ed892315c8317f08f263466af35e9279.zip
Merge pull request #99 from rust-lang/feature/simplify-masks
Feature/simplify masks
-rw-r--r--crates/core_simd/src/comparisons.rs45
-rw-r--r--crates/core_simd/src/intrinsics.rs6
-rw-r--r--crates/core_simd/src/lanes_at_most_32.rs42
-rw-r--r--crates/core_simd/src/masks/bitmask.rs259
-rw-r--r--crates/core_simd/src/masks/full_masks.rs372
-rw-r--r--crates/core_simd/src/masks/mod.rs218
-rw-r--r--crates/core_simd/src/reduction.rs30
-rw-r--r--crates/core_simd/src/vector/float.rs1
-rw-r--r--crates/core_simd/src/vector/int.rs1
-rw-r--r--crates/core_simd/src/vector/uint.rs1
-rw-r--r--crates/core_simd/tests/masks.rs42
-rw-r--r--crates/test_helpers/src/array.rs5
-rw-r--r--crates/test_helpers/src/lib.rs12
13 files changed, 443 insertions, 591 deletions
diff --git a/crates/core_simd/src/comparisons.rs b/crates/core_simd/src/comparisons.rs
index 988ff857eab..e8d11406c09 100644
--- a/crates/core_simd/src/comparisons.rs
+++ b/crates/core_simd/src/comparisons.rs
@@ -1,19 +1,19 @@
 use crate::LanesAtMost32;
 
 macro_rules! implement_mask_ops {
-    { $($vector:ident => $mask:ident ($inner_mask_ty:ident, $inner_ty:ident),)* } => {
+    { $($vector:ident => $mask:ident ($inner_ty:ident),)* } => {
         $(
             impl<const LANES: usize> crate::$vector<LANES>
             where
                 crate::$vector<LANES>: LanesAtMost32,
                 crate::$inner_ty<LANES>: LanesAtMost32,
+                crate::$mask<LANES>: crate::Mask,
             {
                 /// Test if each lane is equal to the corresponding lane in `other`.
                 #[inline]
                 pub fn lanes_eq(self, other: Self) -> crate::$mask<LANES> {
                     unsafe {
-                        crate::$inner_mask_ty::from_int_unchecked(crate::intrinsics::simd_eq(self, other))
-                            .into()
+                        crate::$mask::from_int_unchecked(crate::intrinsics::simd_eq(self, other))
                     }
                 }
 
@@ -21,8 +21,7 @@ macro_rules! implement_mask_ops {
                 #[inline]
                 pub fn lanes_ne(self, other: Self) -> crate::$mask<LANES> {
                     unsafe {
-                        crate::$inner_mask_ty::from_int_unchecked(crate::intrinsics::simd_ne(self, other))
-                            .into()
+                        crate::$mask::from_int_unchecked(crate::intrinsics::simd_ne(self, other))
                     }
                 }
 
@@ -30,8 +29,7 @@ macro_rules! implement_mask_ops {
                 #[inline]
                 pub fn lanes_lt(self, other: Self) -> crate::$mask<LANES> {
                     unsafe {
-                        crate::$inner_mask_ty::from_int_unchecked(crate::intrinsics::simd_lt(self, other))
-                            .into()
+                        crate::$mask::from_int_unchecked(crate::intrinsics::simd_lt(self, other))
                     }
                 }
 
@@ -39,8 +37,7 @@ macro_rules! implement_mask_ops {
                 #[inline]
                 pub fn lanes_gt(self, other: Self) -> crate::$mask<LANES> {
                     unsafe {
-                        crate::$inner_mask_ty::from_int_unchecked(crate::intrinsics::simd_gt(self, other))
-                            .into()
+                        crate::$mask::from_int_unchecked(crate::intrinsics::simd_gt(self, other))
                     }
                 }
 
@@ -48,8 +45,7 @@ macro_rules! implement_mask_ops {
                 #[inline]
                 pub fn lanes_le(self, other: Self) -> crate::$mask<LANES> {
                     unsafe {
-                        crate::$inner_mask_ty::from_int_unchecked(crate::intrinsics::simd_le(self, other))
-                            .into()
+                        crate::$mask::from_int_unchecked(crate::intrinsics::simd_le(self, other))
                     }
                 }
 
@@ -57,8 +53,7 @@ macro_rules! implement_mask_ops {
                 #[inline]
                 pub fn lanes_ge(self, other: Self) -> crate::$mask<LANES> {
                     unsafe {
-                        crate::$inner_mask_ty::from_int_unchecked(crate::intrinsics::simd_ge(self, other))
-                            .into()
+                        crate::$mask::from_int_unchecked(crate::intrinsics::simd_ge(self, other))
                     }
                 }
             }
@@ -67,18 +62,18 @@ macro_rules! implement_mask_ops {
 }
 
 implement_mask_ops! {
-    SimdI8 => Mask8 (SimdMask8, SimdI8),
-    SimdI16 => Mask16 (SimdMask16, SimdI16),
-    SimdI32 => Mask32 (SimdMask32, SimdI32),
-    SimdI64 => Mask64 (SimdMask64, SimdI64),
-    SimdIsize => MaskSize (SimdMaskSize, SimdIsize),
+    SimdI8 => Mask8 (SimdI8),
+    SimdI16 => Mask16 (SimdI16),
+    SimdI32 => Mask32 (SimdI32),
+    SimdI64 => Mask64 (SimdI64),
+    SimdIsize => MaskSize (SimdIsize),
 
-    SimdU8 => Mask8 (SimdMask8, SimdI8),
-    SimdU16 => Mask16 (SimdMask16, SimdI16),
-    SimdU32 => Mask32 (SimdMask32, SimdI32),
-    SimdU64 => Mask64 (SimdMask64, SimdI64),
-    SimdUsize => MaskSize (SimdMaskSize, SimdIsize),
+    SimdU8 => Mask8 (SimdI8),
+    SimdU16 => Mask16 (SimdI16),
+    SimdU32 => Mask32 (SimdI32),
+    SimdU64 => Mask64 (SimdI64),
+    SimdUsize => MaskSize (SimdIsize),
 
-    SimdF32 => Mask32 (SimdMask32, SimdI32),
-    SimdF64 => Mask64 (SimdMask64, SimdI64),
+    SimdF32 => Mask32 (SimdI32),
+    SimdF64 => Mask64 (SimdI64),
 }
diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs
index 665dc1a51d7..8cbb0cbccf7 100644
--- a/crates/core_simd/src/intrinsics.rs
+++ b/crates/core_simd/src/intrinsics.rs
@@ -76,6 +76,12 @@ extern "platform-intrinsic" {
     pub(crate) fn simd_reduce_and<T, U>(x: T) -> U;
     pub(crate) fn simd_reduce_or<T, U>(x: T) -> U;
     pub(crate) fn simd_reduce_xor<T, U>(x: T) -> U;
+
+    // truncate integer vector to bitmask
+    pub(crate) fn simd_bitmask<T, U>(x: T) -> U;
+
+    // select
+    pub(crate) fn simd_select_bitmask<T, U>(m: T, a: U, b: U) -> U;
 }
 
 #[cfg(feature = "std")]
diff --git a/crates/core_simd/src/lanes_at_most_32.rs b/crates/core_simd/src/lanes_at_most_32.rs
index 1e2f7e952c6..2d84b1306ea 100644
--- a/crates/core_simd/src/lanes_at_most_32.rs
+++ b/crates/core_simd/src/lanes_at_most_32.rs
@@ -1,14 +1,38 @@
-/// Implemented for bitmask sizes that are supported by the implementation.
-pub trait LanesAtMost32 {}
+/// Implemented for vectors that are supported by the implementation.
+pub trait LanesAtMost32: sealed::Sealed {
+    #[doc(hidden)]
+    type BitMask: Into<u64>;
+}
+
+mod sealed {
+    pub trait Sealed {}
+}
 
 macro_rules! impl_for {
     { $name:ident } => {
-        impl LanesAtMost32 for $name<1> {}
-        impl LanesAtMost32 for $name<2> {}
-        impl LanesAtMost32 for $name<4> {}
-        impl LanesAtMost32 for $name<8> {}
-        impl LanesAtMost32 for $name<16> {}
-        impl LanesAtMost32 for $name<32> {}
+        impl<const LANES: usize> sealed::Sealed for $name<LANES>
+        where
+            $name<LANES>: LanesAtMost32,
+        {}
+
+        impl LanesAtMost32 for $name<1> {
+            type BitMask = u8;
+        }
+        impl LanesAtMost32 for $name<2> {
+            type BitMask = u8;
+        }
+        impl LanesAtMost32 for $name<4> {
+            type BitMask = u8;
+        }
+        impl LanesAtMost32 for $name<8> {
+            type BitMask = u8;
+        }
+        impl LanesAtMost32 for $name<16> {
+            type BitMask = u16;
+        }
+        impl LanesAtMost32 for $name<32> {
+            type BitMask = u32;
+        }
     }
 }
 
@@ -28,5 +52,3 @@ impl_for! { SimdIsize }
 
 impl_for! { SimdF32 }
 impl_for! { SimdF64 }
-
-impl_for! { BitMask }
diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs
index b4d1b6d9557..6bcb08cf9db 100644
--- a/crates/core_simd/src/masks/bitmask.rs
+++ b/crates/core_simd/src/masks/bitmask.rs
@@ -1,212 +1,171 @@
-use crate::LanesAtMost32;
+use crate::Mask;
+use core::marker::PhantomData;
+
+/// Helper trait for limiting int conversion types
+pub trait ConvertToInt {}
+impl<const LANES: usize> ConvertToInt for crate::SimdI8<LANES> where Self: crate::LanesAtMost32 {}
+impl<const LANES: usize> ConvertToInt for crate::SimdI16<LANES> where Self: crate::LanesAtMost32 {}
+impl<const LANES: usize> ConvertToInt for crate::SimdI32<LANES> where Self: crate::LanesAtMost32 {}
+impl<const LANES: usize> ConvertToInt for crate::SimdI64<LANES> where Self: crate::LanesAtMost32 {}
+impl<const LANES: usize> ConvertToInt for crate::SimdIsize<LANES> where Self: crate::LanesAtMost32 {}
 
 /// A mask where each lane is represented by a single bit.
-#[derive(Copy, Clone, Debug, PartialOrd, PartialEq, Ord, Eq, Hash)]
 #[repr(transparent)]
-pub struct BitMask<const LANES: usize>(u64)
-where
-    BitMask<LANES>: LanesAtMost32;
+pub struct BitMask<T: Mask, const LANES: usize>(T::BitMask, PhantomData<[(); LANES]>);
 
-impl<const LANES: usize> BitMask<LANES>
-where
-    Self: LanesAtMost32,
-{
-    /// Construct a mask by setting all lanes to the given value.
-    pub fn splat(value: bool) -> Self {
-        if value {
-            Self(u64::MAX >> (64 - LANES))
-        } else {
-            Self(u64::MIN)
-        }
-    }
+impl<T: Mask, const LANES: usize> Copy for BitMask<T, LANES> {}
 
-    /// Tests the value of the specified lane.
-    ///
-    /// # Panics
-    /// Panics if `lane` is greater than or equal to the number of lanes in the vector.
-    #[inline]
-    pub fn test(&self, lane: usize) -> bool {
-        assert!(lane < LANES, "lane index out of range");
-        (self.0 >> lane) & 0x1 > 0
-    }
-
-    /// Sets the value of the specified lane.
-    ///
-    /// # Panics
-    /// Panics if `lane` is greater than or equal to the number of lanes in the vector.
-    #[inline]
-    pub fn set(&mut self, lane: usize, value: bool) {
-        assert!(lane < LANES, "lane index out of range");
-        self.0 ^= ((value ^ self.test(lane)) as u64) << lane
+impl<T: Mask, const LANES: usize> Clone for BitMask<T, LANES> {
+    fn clone(&self) -> Self {
+        *self
     }
 }
 
-impl<const LANES: usize> core::ops::BitAnd for BitMask<LANES>
-where
-    Self: LanesAtMost32,
-{
-    type Output = Self;
-    #[inline]
-    fn bitand(self, rhs: Self) -> Self {
-        Self(self.0 & rhs.0)
+impl<T: Mask, const LANES: usize> PartialEq for BitMask<T, LANES> {
+    fn eq(&self, other: &Self) -> bool {
+        self.0.as_ref() == other.0.as_ref()
     }
 }
 
-impl<const LANES: usize> core::ops::BitAnd<bool> for BitMask<LANES>
-where
-    Self: LanesAtMost32,
-{
-    type Output = Self;
-    #[inline]
-    fn bitand(self, rhs: bool) -> Self {
-        self & Self::splat(rhs)
+impl<T: Mask, const LANES: usize> PartialOrd for BitMask<T, LANES> {
+    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+        self.0.as_ref().partial_cmp(other.0.as_ref())
     }
 }
 
-impl<const LANES: usize> core::ops::BitAnd<BitMask<LANES>> for bool
-where
-    BitMask<LANES>: LanesAtMost32,
-{
-    type Output = BitMask<LANES>;
-    #[inline]
-    fn bitand(self, rhs: BitMask<LANES>) -> BitMask<LANES> {
-        BitMask::<LANES>::splat(self) & rhs
-    }
-}
+impl<T: Mask, const LANES: usize> Eq for BitMask<T, LANES> {}
 
-impl<const LANES: usize> core::ops::BitOr for BitMask<LANES>
-where
-    Self: LanesAtMost32,
-{
-    type Output = Self;
-    #[inline]
-    fn bitor(self, rhs: Self) -> Self {
-        Self(self.0 | rhs.0)
+impl<T: Mask, const LANES: usize> Ord for BitMask<T, LANES> {
+    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
+        self.0.as_ref().cmp(other.0.as_ref())
     }
 }
 
-impl<const LANES: usize> core::ops::BitOr<bool> for BitMask<LANES>
-where
-    Self: LanesAtMost32,
-{
-    type Output = Self;
+impl<T: Mask, const LANES: usize> BitMask<T, LANES> {
     #[inline]
-    fn bitor(self, rhs: bool) -> Self {
-        self | Self::splat(rhs)
+    pub fn splat(value: bool) -> Self {
+        let mut mask = T::BitMask::default();
+        if value {
+            mask.as_mut().fill(u8::MAX)
+        } else {
+            mask.as_mut().fill(u8::MIN)
+        }
+        if LANES % 8 > 0 {
+            *mask.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - LANES % 8);
+        }
+        Self(mask, PhantomData)
     }
-}
 
-impl<const LANES: usize> core::ops::BitOr<BitMask<LANES>> for bool
-where
-    BitMask<LANES>: LanesAtMost32,
-{
-    type Output = BitMask<LANES>;
     #[inline]
-    fn bitor(self, rhs: BitMask<LANES>) -> BitMask<LANES> {
-        BitMask::<LANES>::splat(self) | rhs
+    pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
+        (self.0.as_ref()[lane / 8] >> lane % 8) & 0x1 > 0
     }
-}
 
-impl<const LANES: usize> core::ops::BitXor for BitMask<LANES>
-where
-    Self: LanesAtMost32,
-{
-    type Output = Self;
     #[inline]
-    fn bitxor(self, rhs: Self) -> Self::Output {
-        Self(self.0 ^ rhs.0)
+    pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
+        self.0.as_mut()[lane / 8] ^= ((value ^ self.test_unchecked(lane)) as u8) << (lane % 8)
     }
-}
 
-impl<const LANES: usize> core::ops::BitXor<bool> for BitMask<LANES>
-where
-    Self: LanesAtMost32,
-{
-    type Output = Self;
     #[inline]
-    fn bitxor(self, rhs: bool) -> Self::Output {
-        self ^ Self::splat(rhs)
+    pub fn to_int<V>(self) -> V
+    where
+        V: ConvertToInt + Default + core::ops::Not<Output = V>,
+    {
+        unsafe {
+            let mask: T::IntBitMask = core::mem::transmute_copy(&self);
+            crate::intrinsics::simd_select_bitmask(mask, !V::default(), V::default())
+        }
     }
-}
 
-impl<const LANES: usize> core::ops::BitXor<BitMask<LANES>> for bool
-where
-    BitMask<LANES>: LanesAtMost32,
-{
-    type Output = BitMask<LANES>;
     #[inline]
-    fn bitxor(self, rhs: BitMask<LANES>) -> Self::Output {
-        BitMask::<LANES>::splat(self) ^ rhs
+    pub unsafe fn from_int_unchecked<V>(value: V) -> Self
+    where
+        V: crate::LanesAtMost32,
+    {
+        // TODO remove the transmute when rustc is more flexible
+        assert_eq!(
+            core::mem::size_of::<T::IntBitMask>(),
+            core::mem::size_of::<T::BitMask>()
+        );
+        let mask: T::IntBitMask = crate::intrinsics::simd_bitmask(value);
+        Self(core::mem::transmute_copy(&mask), PhantomData)
     }
-}
 
-impl<const LANES: usize> core::ops::Not for BitMask<LANES>
-where
-    Self: LanesAtMost32,
-{
-    type Output = BitMask<LANES>;
     #[inline]
-    fn not(self) -> Self::Output {
-        Self(!self.0)
+    pub fn to_bitmask<U: Mask>(self) -> U::BitMask {
+        assert_eq!(
+            core::mem::size_of::<T::BitMask>(),
+            core::mem::size_of::<U::BitMask>()
+        );
+        unsafe { core::mem::transmute_copy(&self.0) }
     }
-}
 
-impl<const LANES: usize> core::ops::BitAndAssign for BitMask<LANES>
-where
-    Self: LanesAtMost32,
-{
     #[inline]
-    fn bitand_assign(&mut self, rhs: Self) {
-        self.0 &= rhs.0;
+    pub fn any(self) -> bool {
+        self != Self::splat(false)
     }
-}
 
-impl<const LANES: usize> core::ops::BitAndAssign<bool> for BitMask<LANES>
-where
-    Self: LanesAtMost32,
-{
     #[inline]
-    fn bitand_assign(&mut self, rhs: bool) {
-        *self &= Self::splat(rhs);
+    pub fn all(self) -> bool {
+        self == Self::splat(true)
     }
 }
 
-impl<const LANES: usize> core::ops::BitOrAssign for BitMask<LANES>
+impl<T: Mask, const LANES: usize> core::ops::BitAnd for BitMask<T, LANES>
 where
-    Self: LanesAtMost32,
+    T::BitMask: Default + AsRef<[u8]> + AsMut<[u8]>,
 {
+    type Output = Self;
     #[inline]
-    fn bitor_assign(&mut self, rhs: Self) {
-        self.0 |= rhs.0;
+    fn bitand(mut self, rhs: Self) -> Self {
+        for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) {
+            *l &= r;
+        }
+        self
     }
 }
 
-impl<const LANES: usize> core::ops::BitOrAssign<bool> for BitMask<LANES>
+impl<T: Mask, const LANES: usize> core::ops::BitOr for BitMask<T, LANES>
 where
-    Self: LanesAtMost32,
+    T::BitMask: Default + AsRef<[u8]> + AsMut<[u8]>,
 {
+    type Output = Self;
     #[inline]
-    fn bitor_assign(&mut self, rhs: bool) {
-        *self |= Self::splat(rhs);
+    fn bitor(mut self, rhs: Self) -> Self {
+        for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) {
+            *l |= r;
+        }
+        self
     }
 }
 
-impl<const LANES: usize> core::ops::BitXorAssign for BitMask<LANES>
-where
-    Self: LanesAtMost32,
-{
+impl<T: Mask, const LANES: usize> core::ops::BitXor for BitMask<T, LANES> {
+    type Output = Self;
     #[inline]
-    fn bitxor_assign(&mut self, rhs: Self) {
-        self.0 ^= rhs.0;
+    fn bitxor(mut self, rhs: Self) -> Self::Output {
+        for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) {
+            *l ^= r;
+        }
+        self
     }
 }
 
-impl<const LANES: usize> core::ops::BitXorAssign<bool> for BitMask<LANES>
-where
-    Self: LanesAtMost32,
-{
+impl<T: Mask, const LANES: usize> core::ops::Not for BitMask<T, LANES> {
+    type Output = Self;
     #[inline]
-    fn bitxor_assign(&mut self, rhs: bool) {
-        *self ^= Self::splat(rhs);
+    fn not(mut self) -> Self::Output {
+        for x in self.0.as_mut() {
+            *x = !*x;
+        }
+        if LANES % 8 > 0 {
+            *self.0.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - LANES % 8);
+        }
+        self
     }
 }
+
+pub type Mask8<T, const LANES: usize> = BitMask<T, LANES>;
+pub type Mask16<T, const LANES: usize> = BitMask<T, LANES>;
+pub type Mask32<T, const LANES: usize> = BitMask<T, LANES>;
+pub type Mask64<T, const LANES: usize> = BitMask<T, LANES>;
+pub type MaskSize<T, const LANES: usize> = BitMask<T, LANES>;
diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs
index 60a6cb5fdbe..f89bbefba63 100644
--- a/crates/core_simd/src/masks/full_masks.rs
+++ b/crates/core_simd/src/masks/full_masks.rs
@@ -1,17 +1,7 @@
 //! Masks that take up full SIMD vector registers.
 
-/// The error type returned when converting an integer to a mask fails.
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub struct TryFromMaskError(());
-
-impl core::fmt::Display for TryFromMaskError {
-    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
-        write!(
-            f,
-            "mask vector must have all bits set or unset in each lane"
-        )
-    }
-}
+use crate::Mask;
+use core::marker::PhantomData;
 
 macro_rules! define_mask {
     {
@@ -21,18 +11,19 @@ macro_rules! define_mask {
         );
     } => {
         $(#[$attr])*
-        #[derive(Default, PartialEq, PartialOrd, Eq, Ord, Hash)]
         #[repr(transparent)]
-        pub struct $name<const $lanes: usize>(crate::$type<$lanes2>)
+        pub struct $name<T: Mask, const $lanes: usize>(crate::$type<$lanes2>, PhantomData<T>)
         where
             crate::$type<LANES>: crate::LanesAtMost32;
 
-        impl<const LANES: usize> Copy for $name<LANES>
+        impl_full_mask_reductions! { $name, $type }
+
+        impl<T: Mask, const LANES: usize> Copy for $name<T, LANES>
         where
             crate::$type<LANES>: crate::LanesAtMost32,
         {}
 
-        impl<const LANES: usize> Clone for $name<LANES>
+        impl<T: Mask, const LANES: usize> Clone for $name<T, LANES>
         where
             crate::$type<LANES>: crate::LanesAtMost32,
         {
@@ -42,383 +33,182 @@ macro_rules! define_mask {
             }
         }
 
-        impl<const LANES: usize> $name<LANES>
-        where
-            crate::$type<LANES>: crate::LanesAtMost32,
-        {
-            /// Construct a mask by setting all lanes to the given value.
-            pub fn splat(value: bool) -> Self {
-                Self(<crate::$type<LANES>>::splat(
-                    if value {
-                        -1
-                    } else {
-                        0
-                    }
-                ))
-            }
-
-            /// Tests the value of the specified lane.
-            ///
-            /// # Panics
-            /// Panics if `lane` is greater than or equal to the number of lanes in the vector.
-            #[inline]
-            pub fn test(&self, lane: usize) -> bool {
-                assert!(lane < LANES, "lane index out of range");
-                self.0[lane] == -1
-            }
-
-            /// Sets the value of the specified lane.
-            ///
-            /// # Panics
-            /// Panics if `lane` is greater than or equal to the number of lanes in the vector.
-            #[inline]
-            pub fn set(&mut self, lane: usize, value: bool) {
-                assert!(lane < LANES, "lane index out of range");
-                self.0[lane] = if value {
-                    -1
-                } else {
-                    0
-                }
-            }
-
-            /// Converts the mask to the equivalent integer representation, where -1 represents
-            /// "set" and 0 represents "unset".
-            #[inline]
-            pub fn to_int(self) -> crate::$type<LANES> {
-                self.0
-            }
-
-            /// Creates a  mask from the equivalent integer representation, where -1 represents
-            /// "set" and 0 represents "unset".
-            ///
-            /// Each provided lane must be either 0 or -1.
-            #[inline]
-            pub unsafe fn from_int_unchecked(value: crate::$type<LANES>) -> Self {
-                Self(value)
-            }
-
-            /// Creates a mask from the equivalent integer representation, where -1 represents
-            /// "set" and 0 represents "unset".
-            ///
-            /// # Panics
-            /// Panics if any lane is not 0 or -1.
-            #[inline]
-            pub fn from_int(value: crate::$type<LANES>) -> Self {
-                use core::convert::TryInto;
-                value.try_into().unwrap()
-            }
-        }
-
-        impl<const LANES: usize> core::convert::From<bool> for $name<LANES>
-        where
-            crate::$type<LANES>: crate::LanesAtMost32,
-        {
-            fn from(value: bool) -> Self {
-                Self::splat(value)
-            }
-        }
-
-        impl<const LANES: usize> core::convert::TryFrom<crate::$type<LANES>> for $name<LANES>
-        where
-            crate::$type<LANES>: crate::LanesAtMost32,
-        {
-            type Error = TryFromMaskError;
-            fn try_from(value: crate::$type<LANES>) -> Result<Self, Self::Error> {
-                let valid = (value.lanes_eq(crate::$type::<LANES>::splat(0)) | value.lanes_eq(crate::$type::<LANES>::splat(-1))).all();
-                if valid {
-                    Ok(Self(value))
-                } else {
-                    Err(TryFromMaskError(()))
-                }
-            }
-        }
-
-        impl<const LANES: usize> core::convert::From<$name<LANES>> for crate::$type<LANES>
-        where
-            crate::$type<LANES>: crate::LanesAtMost32,
-        {
-            fn from(value: $name<LANES>) -> Self {
-                value.0
-            }
-        }
-
-        impl<const LANES: usize> core::convert::From<crate::BitMask<LANES>> for $name<LANES>
-        where
-            crate::$type<LANES>: crate::LanesAtMost32,
-            crate::BitMask<LANES>: crate::LanesAtMost32,
-        {
-            fn from(value: crate::BitMask<LANES>) -> Self {
-                // TODO use an intrinsic to do this efficiently (with LLVM's sext instruction)
-                let mut mask = Self::splat(false);
-                for lane in 0..LANES {
-                    mask.set(lane, value.test(lane));
-                }
-                mask
-            }
-        }
-
-        impl<const LANES: usize> core::convert::From<$name<LANES>> for crate::BitMask<LANES>
+        impl<T: Mask, const LANES: usize> PartialEq for $name<T, LANES>
         where
             crate::$type<LANES>: crate::LanesAtMost32,
-            crate::BitMask<LANES>: crate::LanesAtMost32,
         {
-            fn from(value: $name<$lanes>) -> Self {
-                // TODO use an intrinsic to do this efficiently (with LLVM's trunc instruction)
-                let mut mask = Self::splat(false);
-                for lane in 0..LANES {
-                    mask.set(lane, value.test(lane));
-                }
-                mask
+            fn eq(&self, other: &Self) -> bool {
+                self.0 == other.0
             }
         }
 
-        impl<const LANES: usize> core::fmt::Debug for $name<LANES>
+        impl<T: Mask, const LANES: usize> PartialOrd for $name<T, LANES>
         where
             crate::$type<LANES>: crate::LanesAtMost32,
         {
-            fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
-                f.debug_list()
-                    .entries((0..LANES).map(|lane| self.test(lane)))
-                    .finish()
+            fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+                self.0.partial_cmp(&other.0)
             }
         }
 
-        impl<const LANES: usize> core::fmt::Binary for $name<LANES>
+        impl<T: Mask, const LANES: usize> Eq for $name<T, LANES>
         where
             crate::$type<LANES>: crate::LanesAtMost32,
-        {
-            fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
-                core::fmt::Binary::fmt(&self.0, f)
-            }
-        }
-
-        impl<const LANES: usize> core::fmt::Octal for $name<LANES>
-        where
-            crate::$type<LANES>: crate::LanesAtMost32,
-        {
-            fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
-                core::fmt::Octal::fmt(&self.0, f)
-            }
-        }
+        {}
 
-        impl<const LANES: usize> core::fmt::LowerHex for $name<LANES>
+        impl<T: Mask, const LANES: usize> Ord for $name<T, LANES>
         where
             crate::$type<LANES>: crate::LanesAtMost32,
         {
-            fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
-                core::fmt::LowerHex::fmt(&self.0, f)
+            fn cmp(&self, other: &Self) -> core::cmp::Ordering {
+                self.0.cmp(&other.0)
             }
         }
 
-        impl<const LANES: usize> core::fmt::UpperHex for $name<LANES>
+        impl<T: Mask, const LANES: usize> $name<T, LANES>
         where
             crate::$type<LANES>: crate::LanesAtMost32,
         {
-            fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
-                core::fmt::UpperHex::fmt(&self.0, f)
+            pub fn splat(value: bool) -> Self {
+                Self(
+                    <crate::$type<LANES>>::splat(
+                        if value {
+                            -1
+                        } else {
+                            0
+                        }
+                    ),
+                    PhantomData,
+                )
             }
-        }
 
-        impl<const LANES: usize> core::ops::BitAnd for $name<LANES>
-        where
-            crate::$type<LANES>: crate::LanesAtMost32,
-        {
-            type Output = Self;
             #[inline]
-            fn bitand(self, rhs: Self) -> Self {
-                Self(self.0 & rhs.0)
+            pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
+                self.0[lane] == -1
             }
-        }
 
-        impl<const LANES: usize> core::ops::BitAnd<bool> for $name<LANES>
-        where
-            crate::$type<LANES>: crate::LanesAtMost32,
-        {
-            type Output = Self;
             #[inline]
-            fn bitand(self, rhs: bool) -> Self {
-                self & Self::splat(rhs)
+            pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
+                self.0[lane] = if value {
+                    -1
+                } else {
+                    0
+                }
             }
-        }
 
-        impl<const LANES: usize> core::ops::BitAnd<$name<LANES>> for bool
-        where
-            crate::$type<LANES>: crate::LanesAtMost32,
-        {
-            type Output = $name<LANES>;
             #[inline]
-            fn bitand(self, rhs: $name<LANES>) -> $name<LANES> {
-                $name::<LANES>::splat(self) & rhs
+            pub fn to_int(self) -> crate::$type<LANES> {
+                self.0
             }
-        }
 
-        impl<const LANES: usize> core::ops::BitOr for $name<LANES>
-        where
-            crate::$type<LANES>: crate::LanesAtMost32,
-        {
-            type Output = Self;
             #[inline]
-            fn bitor(self, rhs: Self) -> Self {
-                Self(self.0 | rhs.0)
+            pub unsafe fn from_int_unchecked(value: crate::$type<LANES>) -> Self {
+                Self(value, PhantomData)
             }
-        }
 
-        impl<const LANES: usize> core::ops::BitOr<bool> for $name<LANES>
-        where
-            crate::$type<LANES>: crate::LanesAtMost32,
-        {
-            type Output = Self;
             #[inline]
-            fn bitor(self, rhs: bool) -> Self {
-                self | Self::splat(rhs)
+            pub fn to_bitmask<U: crate::Mask>(self) -> U::BitMask {
+                unsafe {
+                    // TODO remove the transmute when rustc is more flexible
+                    assert_eq!(core::mem::size_of::<U::IntBitMask>(), core::mem::size_of::<U::BitMask>());
+                    let mask: U::IntBitMask = crate::intrinsics::simd_bitmask(self.0);
+                    let mut bitmask: U::BitMask = core::mem::transmute_copy(&mask);
+
+                    // There is a bug where LLVM appears to implement this operation with the wrong
+                    // bit order.
+                    // TODO fix this in a better way
+                    if cfg!(any(target_arch = "mips", target_arch = "mips64")) {
+                        for x in bitmask.as_mut() {
+                            *x = x.reverse_bits();
+                        }
+                    }
+
+                    bitmask
+                }
             }
         }
 
-        impl<const LANES: usize> core::ops::BitOr<$name<LANES>> for bool
+        impl<T: Mask, const LANES: usize> core::convert::From<$name<T, LANES>> for crate::$type<LANES>
         where
             crate::$type<LANES>: crate::LanesAtMost32,
         {
-            type Output = $name<LANES>;
-            #[inline]
-            fn bitor(self, rhs: $name<LANES>) -> $name<LANES> {
-                $name::<LANES>::splat(self) | rhs
+            fn from(value: $name<T, LANES>) -> Self {
+                value.0
             }
         }
 
-        impl<const LANES: usize> core::ops::BitXor for $name<LANES>
+        impl<T: Mask, const LANES: usize> core::ops::BitAnd for $name<T, LANES>
         where
             crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Output = Self;
             #[inline]
-            fn bitxor(self, rhs: Self) -> Self::Output {
-                Self(self.0 ^ rhs.0)
+            fn bitand(self, rhs: Self) -> Self {
+                Self(self.0 & rhs.0, PhantomData)
             }
         }
 
-        impl<const LANES: usize> core::ops::BitXor<bool> for $name<LANES>
+        impl<T: Mask, const LANES: usize> core::ops::BitOr for $name<T, LANES>
         where
             crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Output = Self;
             #[inline]
-            fn bitxor(self, rhs: bool) -> Self::Output {
-                self ^ Self::splat(rhs)
+            fn bitor(self, rhs: Self) -> Self {
+                Self(self.0 | rhs.0, PhantomData)
             }
         }
 
-        impl<const LANES: usize> core::ops::BitXor<$name<LANES>> for bool
+        impl<T: Mask, const LANES: usize> core::ops::BitXor for $name<T, LANES>
         where
             crate::$type<LANES>: crate::LanesAtMost32,
         {
-            type Output = $name<LANES>;
+            type Output = Self;
             #[inline]
-            fn bitxor(self, rhs: $name<LANES>) -> Self::Output {
-                $name::<LANES>::splat(self) ^ rhs
+            fn bitxor(self, rhs: Self) -> Self::Output {
+                Self(self.0 ^ rhs.0, PhantomData)
             }
         }
 
-        impl<const LANES: usize> core::ops::Not for $name<LANES>
+        impl<T: Mask, const LANES: usize> core::ops::Not for $name<T, LANES>
         where
             crate::$type<LANES>: crate::LanesAtMost32,
         {
-            type Output = $name<LANES>;
+            type Output = Self;
             #[inline]
             fn not(self) -> Self::Output {
-                Self(!self.0)
-            }
-        }
-
-        impl<const LANES: usize> core::ops::BitAndAssign for $name<LANES>
-        where
-            crate::$type<LANES>: crate::LanesAtMost32,
-        {
-            #[inline]
-            fn bitand_assign(&mut self, rhs: Self) {
-                self.0 &= rhs.0;
-            }
-        }
-
-        impl<const LANES: usize> core::ops::BitAndAssign<bool> for $name<LANES>
-        where
-            crate::$type<LANES>: crate::LanesAtMost32,
-        {
-            #[inline]
-            fn bitand_assign(&mut self, rhs: bool) {
-                *self &= Self::splat(rhs);
-            }
-        }
-
-        impl<const LANES: usize> core::ops::BitOrAssign for $name<LANES>
-        where
-            crate::$type<LANES>: crate::LanesAtMost32,
-        {
-            #[inline]
-            fn bitor_assign(&mut self, rhs: Self) {
-                self.0 |= rhs.0;
-            }
-        }
-
-        impl<const LANES: usize> core::ops::BitOrAssign<bool> for $name<LANES>
-        where
-            crate::$type<LANES>: crate::LanesAtMost32,
-        {
-            #[inline]
-            fn bitor_assign(&mut self, rhs: bool) {
-                *self |= Self::splat(rhs);
+                Self(!self.0, PhantomData)
             }
         }
-
-        impl<const LANES: usize> core::ops::BitXorAssign for $name<LANES>
-        where
-            crate::$type<LANES>: crate::LanesAtMost32,
-        {
-            #[inline]
-            fn bitxor_assign(&mut self, rhs: Self) {
-                self.0 ^= rhs.0;
-            }
-        }
-
-        impl<const LANES: usize> core::ops::BitXorAssign<bool> for $name<LANES>
-        where
-            crate::$type<LANES>: crate::LanesAtMost32,
-        {
-            #[inline]
-            fn bitxor_assign(&mut self, rhs: bool) {
-                *self ^= Self::splat(rhs);
-            }
-        }
-
-        impl_full_mask_reductions! { $name, $type }
     }
 }
 
 define_mask! {
     /// A mask equivalent to [SimdI8](crate::SimdI8), where all bits in the lane must be either set
     /// or unset.
-    struct SimdMask8<const LANES: usize>(crate::SimdI8<LANES>);
+    struct Mask8<const LANES: usize>(crate::SimdI8<LANES>);
 }
 
 define_mask! {
     /// A mask equivalent to [SimdI16](crate::SimdI16), where all bits in the lane must be either set
     /// or unset.
-    struct SimdMask16<const LANES: usize>(crate::SimdI16<LANES>);
+    struct Mask16<const LANES: usize>(crate::SimdI16<LANES>);
 }
 
 define_mask! {
     /// A mask equivalent to [SimdI32](crate::SimdI32), where all bits in the lane must be either set
     /// or unset.
-    struct SimdMask32<const LANES: usize>(crate::SimdI32<LANES>);
+    struct Mask32<const LANES: usize>(crate::SimdI32<LANES>);
 }
 
 define_mask! {
     /// A mask equivalent to [SimdI64](crate::SimdI64), where all bits in the lane must be either set
     /// or unset.
-    struct SimdMask64<const LANES: usize>(crate::SimdI64<LANES>);
+    struct Mask64<const LANES: usize>(crate::SimdI64<LANES>);
 }
 
 define_mask! {
     /// A mask equivalent to [SimdIsize](crate::SimdIsize), where all bits in the lane must be either set
     /// or unset.
-    struct SimdMaskSize<const LANES: usize>(crate::SimdIsize<LANES>);
+    struct MaskSize<const LANES: usize>(crate::SimdIsize<LANES>);
 }
diff --git a/crates/core_simd/src/masks/mod.rs b/crates/core_simd/src/masks/mod.rs
index c394c7003a3..deaf2be5dca 100644
--- a/crates/core_simd/src/masks/mod.rs
+++ b/crates/core_simd/src/masks/mod.rs
@@ -1,33 +1,86 @@
 //! Types and traits associated with masking lanes of vectors.
+//! Types representing
 #![allow(non_camel_case_types)]
 
-mod full_masks;
-pub use full_masks::*;
-
-mod bitmask;
-pub use bitmask::*;
+#[cfg_attr(
+    not(all(target_arch = "x86_64", target_feature = "avx512f")),
+    path = "full_masks.rs"
+)]
+#[cfg_attr(
+    all(target_arch = "x86_64", target_feature = "avx512f"),
+    path = "bitmask.rs"
+)]
+mod mask_impl;
 
 use crate::{LanesAtMost32, SimdI16, SimdI32, SimdI64, SimdI8, SimdIsize};
 
+mod sealed {
+    pub trait Sealed {}
+}
+
+/// Helper trait for mask types.
+pub trait Mask: sealed::Sealed {
+    /// The bitmask representation of a mask.
+    type BitMask: Copy + Default + AsRef<[u8]> + AsMut<[u8]>;
+
+    // TODO remove this when rustc intrinsics are more flexible
+    #[doc(hidden)]
+    type IntBitMask;
+}
+
 macro_rules! define_opaque_mask {
     {
         $(#[$attr:meta])*
-        struct $name:ident<const $lanes:ident: usize>($inner_ty:ident<$lanes2:ident>);
+        struct $name:ident<const $lanes:ident: usize>($inner_ty:ty);
         @bits $bits_ty:ident
     } => {
         $(#[$attr])*
         #[allow(non_camel_case_types)]
-        pub struct $name<const LANES: usize>($inner_ty<LANES>) where $bits_ty<LANES>: LanesAtMost32;
+        pub struct $name<const LANES: usize>($inner_ty)
+        where
+            $bits_ty<LANES>: LanesAtMost32,
+            Self: Mask;
+
+        impl<const LANES: usize> sealed::Sealed for $name<LANES>
+        where
+            $bits_ty<LANES>: LanesAtMost32,
+            Self: Mask,
+        {}
+        impl Mask for $name<1> {
+            type BitMask = [u8; 1];
+            type IntBitMask = u8;
+        }
+        impl Mask for $name<2> {
+            type BitMask = [u8; 1];
+            type IntBitMask = u8;
+        }
+        impl Mask for $name<4> {
+            type BitMask = [u8; 1];
+            type IntBitMask = u8;
+        }
+        impl Mask for $name<8> {
+            type BitMask = [u8; 1];
+            type IntBitMask = u8;
+        }
+        impl Mask for $name<16> {
+            type BitMask = [u8; 2];
+            type IntBitMask = u16;
+        }
+        impl Mask for $name<32> {
+            type BitMask = [u8; 4];
+            type IntBitMask = u32;
+        }
 
-        impl_opaque_mask_reductions! { $name, $inner_ty, $bits_ty }
+        impl_opaque_mask_reductions! { $name, $bits_ty }
 
         impl<const LANES: usize> $name<LANES>
         where
-            $bits_ty<LANES>: LanesAtMost32
+            $bits_ty<LANES>: LanesAtMost32,
+            Self: Mask,
         {
             /// Construct a mask by setting all lanes to the given value.
             pub fn splat(value: bool) -> Self {
-                Self(<$inner_ty<LANES>>::splat(value))
+                Self(<$inner_ty>::splat(value))
             }
 
             /// Converts an array to a SIMD vector.
@@ -52,13 +105,63 @@ macro_rules! define_opaque_mask {
                 array
             }
 
+            /// Converts a vector of integers to a mask, where 0 represents `false` and -1
+            /// represents `true`.
+            ///
+            /// # Safety
+            /// All lanes must be either 0 or -1.
+            #[inline]
+            pub unsafe fn from_int_unchecked(value: $bits_ty<LANES>) -> Self {
+                Self(<$inner_ty>::from_int_unchecked(value))
+            }
+
+            /// Converts a vector of integers to a mask, where 0 represents `false` and -1
+            /// represents `true`.
+            ///
+            /// # Panics
+            /// Panics if any lane is not 0 or -1.
+            #[inline]
+            pub fn from_int(value: $bits_ty<LANES>) -> Self {
+                assert!(
+                    (value.lanes_eq($bits_ty::splat(0)) | value.lanes_eq($bits_ty::splat(-1))).all(),
+                    "all values must be either 0 or -1",
+                );
+                unsafe { Self::from_int_unchecked(value) }
+            }
+
+            /// Converts the mask to a vector of integers, where 0 represents `false` and -1
+            /// represents `true`.
+            #[inline]
+            pub fn to_int(self) -> $bits_ty<LANES> {
+                self.0.to_int()
+            }
+
+            /// Tests the value of the specified lane.
+            ///
+            /// # Safety
+            /// `lane` must be less than `LANES`.
+            #[inline]
+            pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
+                self.0.test_unchecked(lane)
+            }
+
             /// Tests the value of the specified lane.
             ///
             /// # Panics
             /// Panics if `lane` is greater than or equal to the number of lanes in the vector.
             #[inline]
             pub fn test(&self, lane: usize) -> bool {
-                self.0.test(lane)
+                assert!(lane < LANES, "lane index out of range");
+                unsafe { self.test_unchecked(lane) }
+            }
+
+            /// Sets the value of the specified lane.
+            ///
+            /// # Safety
+            /// `lane` must be less than `LANES`.
+            #[inline]
+            pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
+                self.0.set_unchecked(lane, value);
             }
 
             /// Sets the value of the specified lane.
@@ -67,52 +170,21 @@ macro_rules! define_opaque_mask {
             /// Panics if `lane` is greater than or equal to the number of lanes in the vector.
             #[inline]
             pub fn set(&mut self, lane: usize, value: bool) {
-                self.0.set(lane, value);
-            }
-        }
-
-        impl<const LANES: usize> From<BitMask<LANES>> for $name<LANES>
-        where
-            $bits_ty<LANES>: LanesAtMost32,
-            BitMask<LANES>: LanesAtMost32,
-        {
-            fn from(value: BitMask<LANES>) -> Self {
-                Self(value.into())
-            }
-        }
-
-        impl<const LANES: usize> From<$name<LANES>> for crate::BitMask<LANES>
-        where
-            $bits_ty<LANES>: LanesAtMost32,
-            BitMask<LANES>: LanesAtMost32,
-        {
-            fn from(value: $name<LANES>) -> Self {
-                value.0.into()
-            }
-        }
-
-        impl<const LANES: usize> From<$inner_ty<LANES>> for $name<LANES>
-        where
-            $bits_ty<LANES>: LanesAtMost32,
-        {
-            fn from(value: $inner_ty<LANES>) -> Self {
-                Self(value)
+                assert!(lane < LANES, "lane index out of range");
+                unsafe { self.set_unchecked(lane, value); }
             }
-        }
 
-        impl<const LANES: usize> From<$name<LANES>> for $inner_ty<LANES>
-        where
-            $bits_ty<LANES>: LanesAtMost32,
-        {
-            fn from(value: $name<LANES>) -> Self {
-                value.0
+            /// Convert this mask to a bitmask, with one bit set per lane.
+            pub fn to_bitmask(self) -> <Self as Mask>::BitMask {
+                self.0.to_bitmask::<Self>()
             }
         }
 
         // vector/array conversion
         impl<const LANES: usize> From<[bool; LANES]> for $name<LANES>
         where
-            $bits_ty<LANES>: crate::LanesAtMost32
+            $bits_ty<LANES>: crate::LanesAtMost32,
+            Self: Mask,
         {
             fn from(array: [bool; LANES]) -> Self {
                 Self::from_array(array)
@@ -121,7 +193,8 @@ macro_rules! define_opaque_mask {
 
         impl <const LANES: usize> From<$name<LANES>> for [bool; LANES]
         where
-            $bits_ty<LANES>: crate::LanesAtMost32
+            $bits_ty<LANES>: crate::LanesAtMost32,
+            $name<LANES>: Mask,
         {
             fn from(vector: $name<LANES>) -> Self {
                 vector.to_array()
@@ -130,13 +203,14 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> Copy for $name<LANES>
         where
-            $inner_ty<LANES>: Copy,
             $bits_ty<LANES>: LanesAtMost32,
+            Self: Mask,
         {}
 
         impl<const LANES: usize> Clone for $name<LANES>
         where
             $bits_ty<LANES>: LanesAtMost32,
+            Self: Mask,
         {
             #[inline]
             fn clone(&self) -> Self {
@@ -147,6 +221,7 @@ macro_rules! define_opaque_mask {
         impl<const LANES: usize> Default for $name<LANES>
         where
             $bits_ty<LANES>: LanesAtMost32,
+            Self: Mask,
         {
             #[inline]
             fn default() -> Self {
@@ -157,6 +232,7 @@ macro_rules! define_opaque_mask {
         impl<const LANES: usize> PartialEq for $name<LANES>
         where
             $bits_ty<LANES>: LanesAtMost32,
+            Self: Mask,
         {
             #[inline]
             fn eq(&self, other: &Self) -> bool {
@@ -167,6 +243,7 @@ macro_rules! define_opaque_mask {
         impl<const LANES: usize> PartialOrd for $name<LANES>
         where
             $bits_ty<LANES>: LanesAtMost32,
+            Self: Mask,
         {
             #[inline]
             fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
@@ -176,16 +253,20 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::fmt::Debug for $name<LANES>
         where
-            $bits_ty<LANES>: LanesAtMost32,
+            $bits_ty<LANES>: crate::LanesAtMost32,
+            Self: Mask,
         {
             fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
-                core::fmt::Debug::fmt(&self.0, f)
+                f.debug_list()
+                    .entries((0..LANES).map(|lane| self.test(lane)))
+                    .finish()
             }
         }
 
         impl<const LANES: usize> core::ops::BitAnd for $name<LANES>
         where
             $bits_ty<LANES>: LanesAtMost32,
+            Self: Mask,
         {
             type Output = Self;
             #[inline]
@@ -197,6 +278,7 @@ macro_rules! define_opaque_mask {
         impl<const LANES: usize> core::ops::BitAnd<bool> for $name<LANES>
         where
             $bits_ty<LANES>: LanesAtMost32,
+            Self: Mask,
         {
             type Output = Self;
             #[inline]
@@ -208,6 +290,7 @@ macro_rules! define_opaque_mask {
         impl<const LANES: usize> core::ops::BitAnd<$name<LANES>> for bool
         where
             $bits_ty<LANES>: LanesAtMost32,
+            $name<LANES>: Mask,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -219,6 +302,7 @@ macro_rules! define_opaque_mask {
         impl<const LANES: usize> core::ops::BitOr for $name<LANES>
         where
             $bits_ty<LANES>: LanesAtMost32,
+            Self: Mask,
         {
             type Output = Self;
             #[inline]
@@ -230,6 +314,7 @@ macro_rules! define_opaque_mask {
         impl<const LANES: usize> core::ops::BitOr<bool> for $name<LANES>
         where
             $bits_ty<LANES>: LanesAtMost32,
+            Self: Mask,
         {
             type Output = Self;
             #[inline]
@@ -241,6 +326,7 @@ macro_rules! define_opaque_mask {
         impl<const LANES: usize> core::ops::BitOr<$name<LANES>> for bool
         where
             $bits_ty<LANES>: LanesAtMost32,
+            $name<LANES>: Mask,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -252,6 +338,7 @@ macro_rules! define_opaque_mask {
         impl<const LANES: usize> core::ops::BitXor for $name<LANES>
         where
             $bits_ty<LANES>: LanesAtMost32,
+            Self: Mask,
         {
             type Output = Self;
             #[inline]
@@ -263,6 +350,7 @@ macro_rules! define_opaque_mask {
         impl<const LANES: usize> core::ops::BitXor<bool> for $name<LANES>
         where
             $bits_ty<LANES>: LanesAtMost32,
+            Self: Mask,
         {
             type Output = Self;
             #[inline]
@@ -274,6 +362,7 @@ macro_rules! define_opaque_mask {
         impl<const LANES: usize> core::ops::BitXor<$name<LANES>> for bool
         where
             $bits_ty<LANES>: LanesAtMost32,
+            $name<LANES>: Mask,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -285,6 +374,7 @@ macro_rules! define_opaque_mask {
         impl<const LANES: usize> core::ops::Not for $name<LANES>
         where
             $bits_ty<LANES>: LanesAtMost32,
+            Self: Mask,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -296,16 +386,18 @@ macro_rules! define_opaque_mask {
         impl<const LANES: usize> core::ops::BitAndAssign for $name<LANES>
         where
             $bits_ty<LANES>: LanesAtMost32,
+            Self: Mask,
         {
             #[inline]
             fn bitand_assign(&mut self, rhs: Self) {
-                self.0 &= rhs.0;
+                self.0 = self.0 & rhs.0;
             }
         }
 
         impl<const LANES: usize> core::ops::BitAndAssign<bool> for $name<LANES>
         where
             $bits_ty<LANES>: LanesAtMost32,
+            Self: Mask,
         {
             #[inline]
             fn bitand_assign(&mut self, rhs: bool) {
@@ -316,16 +408,18 @@ macro_rules! define_opaque_mask {
         impl<const LANES: usize> core::ops::BitOrAssign for $name<LANES>
         where
             $bits_ty<LANES>: LanesAtMost32,
+            Self: Mask,
         {
             #[inline]
             fn bitor_assign(&mut self, rhs: Self) {
-                self.0 |= rhs.0;
+                self.0 = self.0 | rhs.0;
             }
         }
 
         impl<const LANES: usize> core::ops::BitOrAssign<bool> for $name<LANES>
         where
             $bits_ty<LANES>: LanesAtMost32,
+            Self: Mask,
         {
             #[inline]
             fn bitor_assign(&mut self, rhs: bool) {
@@ -336,16 +430,18 @@ macro_rules! define_opaque_mask {
         impl<const LANES: usize> core::ops::BitXorAssign for $name<LANES>
         where
             $bits_ty<LANES>: LanesAtMost32,
+            Self: Mask,
         {
             #[inline]
             fn bitxor_assign(&mut self, rhs: Self) {
-                self.0 ^= rhs.0;
+                self.0 = self.0 ^ rhs.0;
             }
         }
 
         impl<const LANES: usize> core::ops::BitXorAssign<bool> for $name<LANES>
         where
             $bits_ty<LANES>: LanesAtMost32,
+            Self: Mask,
         {
             #[inline]
             fn bitxor_assign(&mut self, rhs: bool) {
@@ -359,7 +455,7 @@ define_opaque_mask! {
     /// Mask for vectors with `LANES` 8-bit elements.
     ///
     /// The layout of this type is unspecified.
-    struct Mask8<const LANES: usize>(SimdMask8<LANES>);
+    struct Mask8<const LANES: usize>(mask_impl::Mask8<Self, LANES>);
     @bits SimdI8
 }
 
@@ -367,7 +463,7 @@ define_opaque_mask! {
     /// Mask for vectors with `LANES` 16-bit elements.
     ///
     /// The layout of this type is unspecified.
-    struct Mask16<const LANES: usize>(SimdMask16<LANES>);
+    struct Mask16<const LANES: usize>(mask_impl::Mask16<Self, LANES>);
     @bits SimdI16
 }
 
@@ -375,7 +471,7 @@ define_opaque_mask! {
     /// Mask for vectors with `LANES` 32-bit elements.
     ///
     /// The layout of this type is unspecified.
-    struct Mask32<const LANES: usize>(SimdMask32<LANES>);
+    struct Mask32<const LANES: usize>(mask_impl::Mask32<Self, LANES>);
     @bits SimdI32
 }
 
@@ -383,7 +479,7 @@ define_opaque_mask! {
     /// Mask for vectors with `LANES` 64-bit elements.
     ///
     /// The layout of this type is unspecified.
-    struct Mask64<const LANES: usize>(SimdMask64<LANES>);
+    struct Mask64<const LANES: usize>(mask_impl::Mask64<Self, LANES>);
     @bits SimdI64
 }
 
@@ -391,7 +487,7 @@ define_opaque_mask! {
     /// Mask for vectors with `LANES` pointer-width elements.
     ///
     /// The layout of this type is unspecified.
-    struct MaskSize<const LANES: usize>(SimdMaskSize<LANES>);
+    struct MaskSize<const LANES: usize>(mask_impl::MaskSize<Self, LANES>);
     @bits SimdIsize
 }
 
diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs
index 382d366dd3d..8687d1af516 100644
--- a/crates/core_simd/src/reduction.rs
+++ b/crates/core_simd/src/reduction.rs
@@ -103,18 +103,16 @@ macro_rules! impl_float_reductions {
 }
 
 macro_rules! impl_full_mask_reductions {
-    { $name:ident, $inner:ident } => {
-        impl<const LANES: usize> crate::$name<LANES>
+    { $name:ident, $bits_ty:ident } => {
+        impl<T: crate::Mask, const LANES: usize> $name<T, LANES>
         where
-            crate::$inner<LANES>: crate::LanesAtMost32
+            crate::$bits_ty<LANES>: crate::LanesAtMost32
         {
-            /// Returns true if any lane is set, or false otherwise.
             #[inline]
             pub fn any(self) -> bool {
                 unsafe { crate::intrinsics::simd_reduce_any(self.to_int()) }
             }
 
-            /// Returns true if all lanes are set, or false otherwise.
             #[inline]
             pub fn all(self) -> bool {
                 unsafe { crate::intrinsics::simd_reduce_all(self.to_int()) }
@@ -124,10 +122,11 @@ macro_rules! impl_full_mask_reductions {
 }
 
 macro_rules! impl_opaque_mask_reductions {
-    { $name:ident, $inner:ident, $bits_ty:ident } => {
+    { $name:ident, $bits_ty:ident } => {
         impl<const LANES: usize> $name<LANES>
         where
-            $bits_ty<LANES>: crate::LanesAtMost32
+            crate::$bits_ty<LANES>: crate::LanesAtMost32,
+            $name<LANES>: crate::Mask,
         {
             /// Returns true if any lane is set, or false otherwise.
             #[inline]
@@ -143,20 +142,3 @@ macro_rules! impl_opaque_mask_reductions {
         }
     }
 }
-
-impl<const LANES: usize> crate::BitMask<LANES>
-where
-    crate::BitMask<LANES>: crate::LanesAtMost32,
-{
-    /// Returns true if any lane is set, or false otherwise.
-    #[inline]
-    pub fn any(self) -> bool {
-        self != Self::splat(false)
-    }
-
-    /// Returns true if all lanes are set, or false otherwise.
-    #[inline]
-    pub fn all(self) -> bool {
-        self == Self::splat(true)
-    }
-}
diff --git a/crates/core_simd/src/vector/float.rs b/crates/core_simd/src/vector/float.rs
index 47013053ae1..6371f88a40a 100644
--- a/crates/core_simd/src/vector/float.rs
+++ b/crates/core_simd/src/vector/float.rs
@@ -42,6 +42,7 @@ macro_rules! impl_float_vector {
             Self: crate::LanesAtMost32,
             crate::$bits_ty<LANES>: crate::LanesAtMost32,
             crate::$mask_impl_ty<LANES>: crate::LanesAtMost32,
+            crate::$mask_ty<LANES>: crate::Mask,
         {
             /// Returns true for each lane if it has a positive sign, including
             /// `+0.0`, `NaN`s with positive sign bit and positive infinity.
diff --git a/crates/core_simd/src/vector/int.rs b/crates/core_simd/src/vector/int.rs
index 30b09a229e9..a535fad7bc1 100644
--- a/crates/core_simd/src/vector/int.rs
+++ b/crates/core_simd/src/vector/int.rs
@@ -30,6 +30,7 @@ macro_rules! impl_integer_vector {
         where
             Self: crate::LanesAtMost32,
             crate::$mask_impl_ty<LANES>: crate::LanesAtMost32,
+            crate::$mask_ty<LANES>: crate::Mask,
         {
             /// Returns true for each positive lane and false if it is zero or negative.
             pub fn is_positive(self) -> crate::$mask_ty<LANES> {
diff --git a/crates/core_simd/src/vector/uint.rs b/crates/core_simd/src/vector/uint.rs
index 53e780520a7..db027b0941f 100644
--- a/crates/core_simd/src/vector/uint.rs
+++ b/crates/core_simd/src/vector/uint.rs
@@ -1,6 +1,5 @@
 #![allow(non_camel_case_types)]
 
-
 /// Implements additional integer traits (Eq, Ord, Hash) on the specified vector `$name`, holding multiple `$lanes` of `$type`.
 macro_rules! impl_unsigned_vector {
     { $name:ident, $type:ty } => {
diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs
index 59da77de622..7021d58aa54 100644
--- a/crates/core_simd/tests/masks.rs
+++ b/crates/core_simd/tests/masks.rs
@@ -1,30 +1,9 @@
-use core::convert::TryFrom;
-use core_simd::{BitMask, Mask8, SimdI8, SimdMask8};
-
 #[cfg(target_arch = "wasm32")]
 use wasm_bindgen_test::*;
 
 #[cfg(target_arch = "wasm32")]
 wasm_bindgen_test_configure!(run_in_browser);
 
-#[test]
-#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
-fn mask_format_round_trip() {
-    let ints = SimdI8::from_array([-1, 0, 0, -1]);
-
-    let simd_mask = SimdMask8::try_from(ints).unwrap();
-
-    let bitmask = BitMask::from(simd_mask);
-
-    let opaque_mask = Mask8::from(bitmask);
-
-    let simd_mask_returned = SimdMask8::from(opaque_mask);
-
-    let ints_returned = SimdI8::from(simd_mask_returned);
-
-    assert_eq!(ints_returned, ints);
-}
-
 macro_rules! test_mask_api {
     { $name:ident } => {
         #[allow(non_snake_case)]
@@ -77,12 +56,29 @@ macro_rules! test_mask_api {
                 v.set(2, true);
                 assert!(!v.all());
             }
+
+            #[test]
+            fn roundtrip_int_conversion() {
+                let values = [true, false, false, true, false, false, true, false];
+                let mask = core_simd::$name::<8>::from_array(values);
+                let int = mask.to_int();
+                assert_eq!(int.to_array(), [-1, 0, 0, -1, 0, 0, -1, 0]);
+                assert_eq!(core_simd::$name::<8>::from_int(int), mask);
+            }
+
+            #[test]
+            fn to_bitmask() {
+                let values = [
+                    true, false, false, true, false, false, true, false,
+                    true, true, false, false, false, false, false, true,
+                ];
+                let mask = core_simd::$name::<16>::from_array(values);
+                assert_eq!(mask.to_bitmask(), [0b01001001, 0b10000011]);
+            }
         }
     }
 }
 
 mod mask_api {
     test_mask_api! { Mask8 }
-    test_mask_api! { SimdMask8 }
-    test_mask_api! { BitMask }
 }
diff --git a/crates/test_helpers/src/array.rs b/crates/test_helpers/src/array.rs
index c64bfee4f2d..5ffc9226976 100644
--- a/crates/test_helpers/src/array.rs
+++ b/crates/test_helpers/src/array.rs
@@ -3,14 +3,11 @@
 // Adapted from proptest's array code
 // Copyright 2017 Jason Lingle
 
+use core::{marker::PhantomData, mem::MaybeUninit};
 use proptest::{
     strategy::{NewTree, Strategy, ValueTree},
     test_runner::TestRunner,
 };
-use core::{
-    marker::PhantomData,
-    mem::MaybeUninit,
-};
 
 #[must_use = "strategies do nothing unless used"]
 #[derive(Clone, Copy, Debug)]
diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs
index 9e8790842b4..fffd088f4da 100644
--- a/crates/test_helpers/src/lib.rs
+++ b/crates/test_helpers/src/lib.rs
@@ -281,7 +281,11 @@ macro_rules! test_lanes {
                     core_simd::SimdIsize<$lanes>: core_simd::LanesAtMost32,
                     core_simd::SimdF32<$lanes>: core_simd::LanesAtMost32,
                     core_simd::SimdF64<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::BitMask<$lanes>: core_simd::LanesAtMost32,
+                    core_simd::Mask8<$lanes>: core_simd::Mask,
+                    core_simd::Mask16<$lanes>: core_simd::Mask,
+                    core_simd::Mask32<$lanes>: core_simd::Mask,
+                    core_simd::Mask64<$lanes>: core_simd::Mask,
+                    core_simd::MaskSize<$lanes>: core_simd::Mask,
                 $body
 
                 #[cfg(target_arch = "wasm32")]
@@ -351,7 +355,11 @@ macro_rules! test_lanes_panic {
                     core_simd::SimdIsize<$lanes>: core_simd::LanesAtMost32,
                     core_simd::SimdF32<$lanes>: core_simd::LanesAtMost32,
                     core_simd::SimdF64<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::BitMask<$lanes>: core_simd::LanesAtMost32,
+                    core_simd::Mask8<$lanes>: core_simd::Mask,
+                    core_simd::Mask16<$lanes>: core_simd::Mask,
+                    core_simd::Mask32<$lanes>: core_simd::Mask,
+                    core_simd::Mask64<$lanes>: core_simd::Mask,
+                    core_simd::MaskSize<$lanes>: core_simd::Mask,
                 $body
 
                 #[test]