about summary refs log tree commit diff
diff options
context:
space:
mode:
authorJubilee <46493976+workingjubilee@users.noreply.github.com>2021-04-22 16:44:41 -0700
committerGitHub <noreply@github.com>2021-04-22 16:44:41 -0700
commit24ebae870e11ed60a83ca0acccc202387f95f25f (patch)
tree20c0067e7ed6ff8f8f8134e23c80c563eed870c6
parent2fa62b91c81b10064bb298befa1f539d1ea28a08 (diff)
parent04ee1073237dc77b3742e7a1c0d3740c1df499c4 (diff)
downloadrust-24ebae870e11ed60a83ca0acccc202387f95f25f.tar.gz
rust-24ebae870e11ed60a83ca0acccc202387f95f25f.zip
Merge pull request #83 from rust-lang/feature/reductions
Add reductions
-rw-r--r--crates/core_simd/src/intrinsics.rs11
-rw-r--r--crates/core_simd/src/lib.rs2
-rw-r--r--crates/core_simd/src/masks/bitmask.rs4
-rw-r--r--crates/core_simd/src/masks/full_masks.rs128
-rw-r--r--crates/core_simd/src/masks/mod.rs128
-rw-r--r--crates/core_simd/src/reduction.rs162
-rw-r--r--crates/core_simd/src/vector/float.rs1
-rw-r--r--crates/core_simd/src/vector/int.rs1
-rw-r--r--crates/core_simd/src/vector/uint.rs1
-rw-r--r--crates/core_simd/tests/masks.rs18
-rw-r--r--crates/core_simd/tests/ops_macros.rs125
-rw-r--r--crates/test_helpers/src/biteq.rs2
12 files changed, 464 insertions, 119 deletions
diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs
index b85a3ad9922..57e666873c1 100644
--- a/crates/core_simd/src/intrinsics.rs
+++ b/crates/core_simd/src/intrinsics.rs
@@ -73,4 +73,15 @@ extern "platform-intrinsic" {
 
     // {s,u}sub.sat
     pub(crate) fn simd_saturating_sub<T>(x: T, y: T) -> T;
+
+    // reductions
+    pub(crate) fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
+    pub(crate) fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
+    pub(crate) fn simd_reduce_all<T>(x: T) -> bool;
+    pub(crate) fn simd_reduce_any<T>(x: T) -> bool;
+    pub(crate) fn simd_reduce_max<T, U>(x: T) -> U;
+    pub(crate) fn simd_reduce_min<T, U>(x: T) -> U;
+    pub(crate) fn simd_reduce_and<T, U>(x: T) -> U;
+    pub(crate) fn simd_reduce_or<T, U>(x: T) -> U;
+    pub(crate) fn simd_reduce_xor<T, U>(x: T) -> U;
 }
diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs
index 906ee3f06ae..0fc2641516d 100644
--- a/crates/core_simd/src/lib.rs
+++ b/crates/core_simd/src/lib.rs
@@ -11,6 +11,8 @@ mod first;
 mod permute;
 #[macro_use]
 mod transmute;
+#[macro_use]
+mod reduction;
 
 mod comparisons;
 mod fmt;
diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs
index d7400699fde..b4d1b6d9557 100644
--- a/crates/core_simd/src/masks/bitmask.rs
+++ b/crates/core_simd/src/masks/bitmask.rs
@@ -1,7 +1,7 @@
 use crate::LanesAtMost32;
 
 /// A mask where each lane is represented by a single bit.
-#[derive(Copy, Clone, Debug)]
+#[derive(Copy, Clone, Debug, PartialOrd, PartialEq, Ord, Eq, Hash)]
 #[repr(transparent)]
 pub struct BitMask<const LANES: usize>(u64)
 where
@@ -14,7 +14,7 @@ where
     /// Construct a mask by setting all lanes to the given value.
     pub fn splat(value: bool) -> Self {
         if value {
-            Self(u64::MAX)
+            Self(u64::MAX >> (64 - LANES))
         } else {
             Self(u64::MIN)
         }
diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs
index cca077b14d0..a6689ce48c6 100644
--- a/crates/core_simd/src/masks/full_masks.rs
+++ b/crates/core_simd/src/masks/full_masks.rs
@@ -14,22 +14,27 @@ impl core::fmt::Display for TryFromMaskError {
 }
 
 macro_rules! define_mask {
-    { $(#[$attr:meta])* struct $name:ident<const $lanes:ident: usize>($type:ty); } => {
+    {
+        $(#[$attr:meta])*
+        struct $name:ident<const $lanes:ident: usize>(
+            crate::$type:ident<$lanes2:ident>
+        );
+    } => {
         $(#[$attr])*
         #[derive(Default, PartialEq, PartialOrd, Eq, Ord, Hash)]
         #[repr(transparent)]
-        pub struct $name<const $lanes: usize>($type)
+        pub struct $name<const $lanes: usize>(crate::$type<$lanes2>)
         where
-            $type: crate::LanesAtMost32;
+            crate::$type<LANES>: crate::LanesAtMost32;
 
         impl<const LANES: usize> Copy for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {}
 
         impl<const LANES: usize> Clone for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             #[inline]
             fn clone(&self) -> Self {
@@ -37,13 +42,13 @@ macro_rules! define_mask {
             }
         }
 
-        impl<const $lanes: usize> $name<$lanes>
+        impl<const LANES: usize> $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             /// Construct a mask by setting all lanes to the given value.
             pub fn splat(value: bool) -> Self {
-                Self(<$type>::splat(
+                Self(<crate::$type<LANES>>::splat(
                     if value {
                         -1
                     } else {
@@ -76,42 +81,51 @@ macro_rules! define_mask {
                 }
             }
 
-            /// Creates a mask from an integer vector.
+            /// Converts the mask to the equivalent integer representation, where -1 represents
+            /// "set" and 0 represents "unset".
+            #[inline]
+            pub fn to_int(self) -> crate::$type<LANES> {
+                self.0
+            }
+
+            /// Creates a  mask from the equivalent integer representation, where -1 represents
+            /// "set" and 0 represents "unset".
             ///
-            /// # Safety
-            /// All lanes must be either 0 or -1.
+            /// Each provided lane must be either 0 or -1.
             #[inline]
-            pub unsafe fn from_int_unchecked(value: $type) -> Self {
+            pub unsafe fn from_int_unchecked(value: crate::$type<LANES>) -> Self {
                 Self(value)
             }
 
-            /// Creates a mask from an integer vector.
+            /// Creates a mask from the equivalent integer representation, where -1 represents
+            /// "set" and 0 represents "unset".
             ///
             /// # Panics
             /// Panics if any lane is not 0 or -1.
             #[inline]
-            pub fn from_int(value: $type) -> Self {
+            pub fn from_int(value: crate::$type<LANES>) -> Self {
                 use core::convert::TryInto;
                 value.try_into().unwrap()
             }
         }
 
-        impl<const $lanes: usize> core::convert::From<bool> for $name<$lanes>
+        impl<const LANES: usize> core::convert::From<bool> for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             fn from(value: bool) -> Self {
                 Self::splat(value)
             }
         }
 
-        impl<const $lanes: usize> core::convert::TryFrom<$type> for $name<$lanes>
+        impl<const LANES: usize> core::convert::TryFrom<crate::$type<LANES>> for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Error = TryFromMaskError;
-            fn try_from(value: $type) -> Result<Self, Self::Error> {
-                if value.as_slice().iter().all(|x| *x == 0 || *x == -1) {
+            fn try_from(value: crate::$type<LANES>) -> Result<Self, Self::Error> {
+                let valid = (value.lanes_eq(crate::$type::<LANES>::splat(0)) | value.lanes_eq(crate::$type::<LANES>::splat(-1))).all();
+                if valid {
                     Ok(Self(value))
                 } else {
                     Err(TryFromMaskError(()))
@@ -119,21 +133,21 @@ macro_rules! define_mask {
             }
         }
 
-        impl<const $lanes: usize> core::convert::From<$name<$lanes>> for $type
+        impl<const LANES: usize> core::convert::From<$name<LANES>> for crate::$type<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
-            fn from(value: $name<$lanes>) -> Self {
+            fn from(value: $name<LANES>) -> Self {
                 value.0
             }
         }
 
-        impl<const $lanes: usize> core::convert::From<crate::BitMask<$lanes>> for $name<$lanes>
+        impl<const LANES: usize> core::convert::From<crate::BitMask<LANES>> for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
-            crate::BitMask<$lanes>: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::BitMask<LANES>: crate::LanesAtMost32,
         {
-            fn from(value: crate::BitMask<$lanes>) -> Self {
+            fn from(value: crate::BitMask<LANES>) -> Self {
                 // TODO use an intrinsic to do this efficiently (with LLVM's sext instruction)
                 let mut mask = Self::splat(false);
                 for lane in 0..LANES {
@@ -143,10 +157,10 @@ macro_rules! define_mask {
             }
         }
 
-        impl<const $lanes: usize> core::convert::From<$name<$lanes>> for crate::BitMask<$lanes>
+        impl<const LANES: usize> core::convert::From<$name<LANES>> for crate::BitMask<LANES>
         where
-            $type: crate::LanesAtMost32,
-            crate::BitMask<$lanes>: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::BitMask<LANES>: crate::LanesAtMost32,
         {
             fn from(value: $name<$lanes>) -> Self {
                 // TODO use an intrinsic to do this efficiently (with LLVM's trunc instruction)
@@ -158,9 +172,9 @@ macro_rules! define_mask {
             }
         }
 
-        impl<const $lanes: usize> core::fmt::Debug for $name<$lanes>
+        impl<const LANES: usize> core::fmt::Debug for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
                 f.debug_list()
@@ -169,36 +183,36 @@ macro_rules! define_mask {
             }
         }
 
-        impl<const $lanes: usize> core::fmt::Binary for $name<$lanes>
+        impl<const LANES: usize> core::fmt::Binary for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
                 core::fmt::Binary::fmt(&self.0, f)
             }
         }
 
-        impl<const $lanes: usize> core::fmt::Octal for $name<$lanes>
+        impl<const LANES: usize> core::fmt::Octal for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
                 core::fmt::Octal::fmt(&self.0, f)
             }
         }
 
-        impl<const $lanes: usize> core::fmt::LowerHex for $name<$lanes>
+        impl<const LANES: usize> core::fmt::LowerHex for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
                 core::fmt::LowerHex::fmt(&self.0, f)
             }
         }
 
-        impl<const $lanes: usize> core::fmt::UpperHex for $name<$lanes>
+        impl<const LANES: usize> core::fmt::UpperHex for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
                 core::fmt::UpperHex::fmt(&self.0, f)
@@ -207,7 +221,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitAnd for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -218,7 +232,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitAnd<bool> for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -229,7 +243,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitAnd<$name<LANES>> for bool
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -240,7 +254,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitOr for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -251,7 +265,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitOr<bool> for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -262,7 +276,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitOr<$name<LANES>> for bool
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -273,7 +287,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitXor for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -284,7 +298,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitXor<bool> for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -295,7 +309,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitXor<$name<LANES>> for bool
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -306,7 +320,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::Not for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -317,7 +331,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitAndAssign for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             #[inline]
             fn bitand_assign(&mut self, rhs: Self) {
@@ -327,7 +341,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitAndAssign<bool> for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             #[inline]
             fn bitand_assign(&mut self, rhs: bool) {
@@ -337,7 +351,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitOrAssign for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             #[inline]
             fn bitor_assign(&mut self, rhs: Self) {
@@ -347,7 +361,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitOrAssign<bool> for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             #[inline]
             fn bitor_assign(&mut self, rhs: bool) {
@@ -357,7 +371,7 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitXorAssign for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             #[inline]
             fn bitxor_assign(&mut self, rhs: Self) {
@@ -367,13 +381,15 @@ macro_rules! define_mask {
 
         impl<const LANES: usize> core::ops::BitXorAssign<bool> for $name<LANES>
         where
-            $type: crate::LanesAtMost32,
+            crate::$type<LANES>: crate::LanesAtMost32,
         {
             #[inline]
             fn bitxor_assign(&mut self, rhs: bool) {
                 *self ^= Self::splat(rhs);
             }
         }
+
+        impl_full_mask_reductions! { $name, $type }
     }
 }
 
diff --git a/crates/core_simd/src/masks/mod.rs b/crates/core_simd/src/masks/mod.rs
index 0b986aaf7e1..4503187e4b8 100644
--- a/crates/core_simd/src/masks/mod.rs
+++ b/crates/core_simd/src/masks/mod.rs
@@ -7,25 +7,27 @@ pub use full_masks::*;
 mod bitmask;
 pub use bitmask::*;
 
-use crate::LanesAtMost32;
+use crate::{LanesAtMost32, SimdI128, SimdI16, SimdI32, SimdI64, SimdI8, SimdIsize};
 
 macro_rules! define_opaque_mask {
     {
         $(#[$attr:meta])*
-        struct $name:ident<const $lanes:ident: usize>($inner_ty:ty);
-        @bits $bits_ty:ty
+        struct $name:ident<const $lanes:ident: usize>($inner_ty:ident<$lanes2:ident>);
+        @bits $bits_ty:ident
     } => {
         $(#[$attr])*
         #[allow(non_camel_case_types)]
-        pub struct $name<const $lanes: usize>($inner_ty) where $bits_ty: LanesAtMost32;
+        pub struct $name<const LANES: usize>($inner_ty<LANES>) where $bits_ty<LANES>: LanesAtMost32;
 
-        impl<const $lanes: usize> $name<$lanes>
+        impl_opaque_mask_reductions! { $name, $inner_ty, $bits_ty }
+
+        impl<const LANES: usize> $name<LANES>
         where
-            $bits_ty: LanesAtMost32
+            $bits_ty<LANES>: LanesAtMost32
         {
             /// Construct a mask by setting all lanes to the given value.
             pub fn splat(value: bool) -> Self {
-                Self(<$inner_ty>::splat(value))
+                Self(<$inner_ty<LANES>>::splat(value))
             }
 
             /// Converts an array to a SIMD vector.
@@ -69,66 +71,72 @@ macro_rules! define_opaque_mask {
             }
         }
 
-        impl<const $lanes: usize> From<BitMask<$lanes>> for $name<$lanes>
+        impl<const LANES: usize> From<BitMask<LANES>> for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
-            BitMask<$lanes>: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
+            BitMask<LANES>: LanesAtMost32,
         {
-            fn from(value: BitMask<$lanes>) -> Self {
+            fn from(value: BitMask<LANES>) -> Self {
                 Self(value.into())
             }
         }
 
-        impl<const $lanes: usize> From<$name<$lanes>> for crate::BitMask<$lanes>
+        impl<const LANES: usize> From<$name<LANES>> for crate::BitMask<LANES>
         where
-            $bits_ty: LanesAtMost32,
-            BitMask<$lanes>: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
+            BitMask<LANES>: LanesAtMost32,
         {
-            fn from(value: $name<$lanes>) -> Self {
+            fn from(value: $name<LANES>) -> Self {
                 value.0.into()
             }
         }
 
-        impl<const $lanes: usize> From<$inner_ty> for $name<$lanes>
+        impl<const LANES: usize> From<$inner_ty<LANES>> for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
-            fn from(value: $inner_ty) -> Self {
+            fn from(value: $inner_ty<LANES>) -> Self {
                 Self(value)
             }
         }
 
-        impl<const $lanes: usize> From<$name<$lanes>> for $inner_ty
+        impl<const LANES: usize> From<$name<LANES>> for $inner_ty<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
-            fn from(value: $name<$lanes>) -> Self {
+            fn from(value: $name<LANES>) -> Self {
                 value.0
             }
         }
 
         // vector/array conversion
-        impl<const $lanes: usize> From<[bool; $lanes]> for $name<$lanes> where $bits_ty: crate::LanesAtMost32 {
-            fn from(array: [bool; $lanes]) -> Self {
+        impl<const LANES: usize> From<[bool; LANES]> for $name<LANES>
+        where
+            $bits_ty<LANES>: crate::LanesAtMost32
+        {
+            fn from(array: [bool; LANES]) -> Self {
                 Self::from_array(array)
             }
         }
 
-        impl <const $lanes: usize> From<$name<$lanes>> for [bool; $lanes] where $bits_ty: crate::LanesAtMost32 {
-            fn from(vector: $name<$lanes>) -> Self {
+        impl <const LANES: usize> From<$name<LANES>> for [bool; LANES]
+        where
+            $bits_ty<LANES>: crate::LanesAtMost32
+        {
+            fn from(vector: $name<LANES>) -> Self {
                 vector.to_array()
             }
         }
 
-        impl<const $lanes: usize> Copy for $name<$lanes>
+        impl<const LANES: usize> Copy for $name<LANES>
         where
-            $inner_ty: Copy,
-            $bits_ty: LanesAtMost32,
+            $inner_ty<LANES>: Copy,
+            $bits_ty<LANES>: LanesAtMost32,
         {}
 
-        impl<const $lanes: usize> Clone for $name<$lanes>
+        impl<const LANES: usize> Clone for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             #[inline]
             fn clone(&self) -> Self {
@@ -136,9 +144,9 @@ macro_rules! define_opaque_mask {
             }
         }
 
-        impl<const $lanes: usize> Default for $name<$lanes>
+        impl<const LANES: usize> Default for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             #[inline]
             fn default() -> Self {
@@ -146,9 +154,9 @@ macro_rules! define_opaque_mask {
             }
         }
 
-        impl<const $lanes: usize> PartialEq for $name<$lanes>
+        impl<const LANES: usize> PartialEq for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             #[inline]
             fn eq(&self, other: &Self) -> bool {
@@ -156,9 +164,9 @@ macro_rules! define_opaque_mask {
             }
         }
 
-        impl<const $lanes: usize> PartialOrd for $name<$lanes>
+        impl<const LANES: usize> PartialOrd for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             #[inline]
             fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
@@ -166,9 +174,9 @@ macro_rules! define_opaque_mask {
             }
         }
 
-        impl<const $lanes: usize> core::fmt::Debug for $name<$lanes>
+        impl<const LANES: usize> core::fmt::Debug for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
                 core::fmt::Debug::fmt(&self.0, f)
@@ -177,7 +185,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitAnd for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -188,7 +196,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitAnd<bool> for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -199,7 +207,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitAnd<$name<LANES>> for bool
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -210,7 +218,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitOr for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -221,7 +229,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitOr<bool> for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -232,7 +240,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitOr<$name<LANES>> for bool
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -243,7 +251,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitXor for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -254,7 +262,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitXor<bool> for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             type Output = Self;
             #[inline]
@@ -265,7 +273,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitXor<$name<LANES>> for bool
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -276,7 +284,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::Not for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             type Output = $name<LANES>;
             #[inline]
@@ -287,7 +295,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitAndAssign for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             #[inline]
             fn bitand_assign(&mut self, rhs: Self) {
@@ -297,7 +305,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitAndAssign<bool> for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             #[inline]
             fn bitand_assign(&mut self, rhs: bool) {
@@ -307,7 +315,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitOrAssign for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             #[inline]
             fn bitor_assign(&mut self, rhs: Self) {
@@ -317,7 +325,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitOrAssign<bool> for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             #[inline]
             fn bitor_assign(&mut self, rhs: bool) {
@@ -327,7 +335,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitXorAssign for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             #[inline]
             fn bitxor_assign(&mut self, rhs: Self) {
@@ -337,7 +345,7 @@ macro_rules! define_opaque_mask {
 
         impl<const LANES: usize> core::ops::BitXorAssign<bool> for $name<LANES>
         where
-            $bits_ty: LanesAtMost32,
+            $bits_ty<LANES>: LanesAtMost32,
         {
             #[inline]
             fn bitxor_assign(&mut self, rhs: bool) {
@@ -352,7 +360,7 @@ define_opaque_mask! {
     ///
     /// The layout of this type is unspecified.
     struct Mask8<const LANES: usize>(SimdMask8<LANES>);
-    @bits crate::SimdI8<LANES>
+    @bits SimdI8
 }
 
 define_opaque_mask! {
@@ -360,7 +368,7 @@ define_opaque_mask! {
     ///
     /// The layout of this type is unspecified.
     struct Mask16<const LANES: usize>(SimdMask16<LANES>);
-    @bits crate::SimdI16<LANES>
+    @bits SimdI16
 }
 
 define_opaque_mask! {
@@ -368,7 +376,7 @@ define_opaque_mask! {
     ///
     /// The layout of this type is unspecified.
     struct Mask32<const LANES: usize>(SimdMask32<LANES>);
-    @bits crate::SimdI32<LANES>
+    @bits SimdI32
 }
 
 define_opaque_mask! {
@@ -376,7 +384,7 @@ define_opaque_mask! {
     ///
     /// The layout of this type is unspecified.
     struct Mask64<const LANES: usize>(SimdMask64<LANES>);
-    @bits crate::SimdI64<LANES>
+    @bits SimdI64
 }
 
 define_opaque_mask! {
@@ -384,7 +392,7 @@ define_opaque_mask! {
     ///
     /// The layout of this type is unspecified.
     struct Mask128<const LANES: usize>(SimdMask128<LANES>);
-    @bits crate::SimdI128<LANES>
+    @bits SimdI128
 }
 
 define_opaque_mask! {
@@ -392,7 +400,7 @@ define_opaque_mask! {
     ///
     /// The layout of this type is unspecified.
     struct MaskSize<const LANES: usize>(SimdMaskSize<LANES>);
-    @bits crate::SimdIsize<LANES>
+    @bits SimdIsize
 }
 
 /// Vector of eight 8-bit masks
diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs
new file mode 100644
index 00000000000..382d366dd3d
--- /dev/null
+++ b/crates/core_simd/src/reduction.rs
@@ -0,0 +1,162 @@
+macro_rules! impl_integer_reductions {
+    { $name:ident, $scalar:ty } => {
+        impl<const LANES: usize> crate::$name<LANES>
+        where
+            Self: crate::LanesAtMost32
+        {
+            /// Horizontal wrapping add.  Returns the sum of the lanes of the vector, with wrapping addition.
+            #[inline]
+            pub fn horizontal_sum(self) -> $scalar {
+                unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0) }
+            }
+
+            /// Horizontal wrapping multiply.  Returns the product of the lanes of the vector, with wrapping multiplication.
+            #[inline]
+            pub fn horizontal_product(self) -> $scalar {
+                unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1) }
+            }
+
+            /// Horizontal bitwise "and".  Returns the cumulative bitwise "and" across the lanes of
+            /// the vector.
+            #[inline]
+            pub fn horizontal_and(self) -> $scalar {
+                unsafe { crate::intrinsics::simd_reduce_and(self) }
+            }
+
+            /// Horizontal bitwise "or".  Returns the cumulative bitwise "or" across the lanes of
+            /// the vector.
+            #[inline]
+            pub fn horizontal_or(self) -> $scalar {
+                unsafe { crate::intrinsics::simd_reduce_or(self) }
+            }
+
+            /// Horizontal bitwise "xor".  Returns the cumulative bitwise "xor" across the lanes of
+            /// the vector.
+            #[inline]
+            pub fn horizontal_xor(self) -> $scalar {
+                unsafe { crate::intrinsics::simd_reduce_xor(self) }
+            }
+
+            /// Horizontal maximum.  Returns the maximum lane in the vector.
+            #[inline]
+            pub fn horizontal_max(self) -> $scalar {
+                unsafe { crate::intrinsics::simd_reduce_max(self) }
+            }
+
+            /// Horizontal minimum.  Returns the minimum lane in the vector.
+            #[inline]
+            pub fn horizontal_min(self) -> $scalar {
+                unsafe { crate::intrinsics::simd_reduce_min(self) }
+            }
+        }
+    }
+}
+
+macro_rules! impl_float_reductions {
+    { $name:ident, $scalar:ty } => {
+        impl<const LANES: usize> crate::$name<LANES>
+        where
+            Self: crate::LanesAtMost32
+        {
+
+            /// Horizontal add.  Returns the sum of the lanes of the vector.
+            #[inline]
+            pub fn horizontal_sum(self) -> $scalar {
+                // LLVM sum is inaccurate on i586
+                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
+                    self.as_slice().iter().sum()
+                } else {
+                    unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0.) }
+                }
+            }
+
+            /// Horizontal multiply.  Returns the product of the lanes of the vector.
+            #[inline]
+            pub fn horizontal_product(self) -> $scalar {
+                // LLVM product is inaccurate on i586
+                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
+                    self.as_slice().iter().product()
+                } else {
+                    unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1.) }
+                }
+            }
+
+            /// Horizontal maximum.  Returns the maximum lane in the vector.
+            ///
+            /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
+            /// return either.  This function will not return `NaN` unless all lanes are `NaN`.
+            #[inline]
+            pub fn horizontal_max(self) -> $scalar {
+                unsafe { crate::intrinsics::simd_reduce_max(self) }
+            }
+
+            /// Horizontal minimum.  Returns the minimum lane in the vector.
+            ///
+            /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
+            /// return either.  This function will not return `NaN` unless all lanes are `NaN`.
+            #[inline]
+            pub fn horizontal_min(self) -> $scalar {
+                unsafe { crate::intrinsics::simd_reduce_min(self) }
+            }
+        }
+    }
+}
+
+macro_rules! impl_full_mask_reductions {
+    { $name:ident, $inner:ident } => {
+        impl<const LANES: usize> crate::$name<LANES>
+        where
+            crate::$inner<LANES>: crate::LanesAtMost32
+        {
+            /// Returns true if any lane is set, or false otherwise.
+            #[inline]
+            pub fn any(self) -> bool {
+                unsafe { crate::intrinsics::simd_reduce_any(self.to_int()) }
+            }
+
+            /// Returns true if all lanes are set, or false otherwise.
+            #[inline]
+            pub fn all(self) -> bool {
+                unsafe { crate::intrinsics::simd_reduce_all(self.to_int()) }
+            }
+        }
+    }
+}
+
+macro_rules! impl_opaque_mask_reductions {
+    { $name:ident, $inner:ident, $bits_ty:ident } => {
+        impl<const LANES: usize> $name<LANES>
+        where
+            $bits_ty<LANES>: crate::LanesAtMost32
+        {
+            /// Returns true if any lane is set, or false otherwise.
+            #[inline]
+            pub fn any(self) -> bool {
+                self.0.any()
+            }
+
+            /// Returns true if all lanes are set, or false otherwise.
+            #[inline]
+            pub fn all(self) -> bool {
+                self.0.all()
+            }
+        }
+    }
+}
+
+impl<const LANES: usize> crate::BitMask<LANES>
+where
+    crate::BitMask<LANES>: crate::LanesAtMost32,
+{
+    /// Returns true if any lane is set, or false otherwise.
+    #[inline]
+    pub fn any(self) -> bool {
+        self != Self::splat(false)
+    }
+
+    /// Returns true if all lanes are set, or false otherwise.
+    #[inline]
+    pub fn all(self) -> bool {
+        self == Self::splat(true)
+    }
+}
diff --git a/crates/core_simd/src/vector/float.rs b/crates/core_simd/src/vector/float.rs
index 46e4229ddb5..47013053ae1 100644
--- a/crates/core_simd/src/vector/float.rs
+++ b/crates/core_simd/src/vector/float.rs
@@ -6,6 +6,7 @@
 macro_rules! impl_float_vector {
     { $name:ident, $type:ty, $bits_ty:ident, $mask_ty:ident, $mask_impl_ty:ident } => {
         impl_vector! { $name, $type }
+        impl_float_reductions! { $name, $type }
 
         impl<const LANES: usize> $name<LANES>
         where
diff --git a/crates/core_simd/src/vector/int.rs b/crates/core_simd/src/vector/int.rs
index 5304d11cd6e..24f77cb3e10 100644
--- a/crates/core_simd/src/vector/int.rs
+++ b/crates/core_simd/src/vector/int.rs
@@ -4,6 +4,7 @@
 macro_rules! impl_integer_vector {
     { $name:ident, $type:ty, $mask_ty:ident, $mask_impl_ty:ident } => {
         impl_vector! { $name, $type }
+        impl_integer_reductions! { $name, $type }
 
         impl<const LANES: usize> Eq for $name<LANES> where Self: crate::LanesAtMost32 {}
 
diff --git a/crates/core_simd/src/vector/uint.rs b/crates/core_simd/src/vector/uint.rs
index 71b5b295112..3866b9ca5c6 100644
--- a/crates/core_simd/src/vector/uint.rs
+++ b/crates/core_simd/src/vector/uint.rs
@@ -5,6 +5,7 @@
 macro_rules! impl_unsigned_vector {
     { $name:ident, $type:ty } => {
         impl_vector! { $name, $type }
+        impl_integer_reductions! { $name, $type }
 
         impl<const LANES: usize> Eq for $name<LANES> where Self: crate::LanesAtMost32 {}
 
diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs
index 03a835b9c66..59da77de622 100644
--- a/crates/core_simd/tests/masks.rs
+++ b/crates/core_simd/tests/masks.rs
@@ -59,6 +59,24 @@ macro_rules! test_mask_api {
                 let mask = core_simd::$name::<8>::splat(false);
                 let _ = mask.test(8);
             }
+
+            #[test]
+            fn any() {
+                assert!(!core_simd::$name::<8>::splat(false).any());
+                assert!(core_simd::$name::<8>::splat(true).any());
+                let mut v = core_simd::$name::<8>::splat(false);
+                v.set(2, true);
+                assert!(v.any());
+            }
+
+            #[test]
+            fn all() {
+                assert!(!core_simd::$name::<8>::splat(false).all());
+                assert!(core_simd::$name::<8>::splat(true).all());
+                let mut v = core_simd::$name::<8>::splat(false);
+                v.set(2, true);
+                assert!(!v.all());
+            }
         }
     }
 }
diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index a70a8a9c48b..37f3b49a330 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -136,6 +136,83 @@ macro_rules! impl_binary_checked_op_test {
     };
 }
 
+#[macro_export]
+macro_rules! impl_common_integer_tests {
+    { $vector:ident, $scalar:ident } => {
+        test_helpers::test_lanes! {
+            fn horizontal_sum<const LANES: usize>() {
+                test_helpers::test_1(&|x| {
+                    test_helpers::prop_assert_biteq! (
+                        $vector::<LANES>::from_array(x).horizontal_sum(),
+                        x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add),
+                    );
+                    Ok(())
+                });
+            }
+
+            fn horizontal_product<const LANES: usize>() {
+                test_helpers::test_1(&|x| {
+                    test_helpers::prop_assert_biteq! (
+                        $vector::<LANES>::from_array(x).horizontal_product(),
+                        x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul),
+                    );
+                    Ok(())
+                });
+            }
+
+            fn horizontal_and<const LANES: usize>() {
+                test_helpers::test_1(&|x| {
+                    test_helpers::prop_assert_biteq! (
+                        $vector::<LANES>::from_array(x).horizontal_and(),
+                        x.iter().copied().fold(-1i8 as $scalar, <$scalar as core::ops::BitAnd>::bitand),
+                    );
+                    Ok(())
+                });
+            }
+
+            fn horizontal_or<const LANES: usize>() {
+                test_helpers::test_1(&|x| {
+                    test_helpers::prop_assert_biteq! (
+                        $vector::<LANES>::from_array(x).horizontal_or(),
+                        x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitOr>::bitor),
+                    );
+                    Ok(())
+                });
+            }
+
+            fn horizontal_xor<const LANES: usize>() {
+                test_helpers::test_1(&|x| {
+                    test_helpers::prop_assert_biteq! (
+                        $vector::<LANES>::from_array(x).horizontal_xor(),
+                        x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitXor>::bitxor),
+                    );
+                    Ok(())
+                });
+            }
+
+            fn horizontal_max<const LANES: usize>() {
+                test_helpers::test_1(&|x| {
+                    test_helpers::prop_assert_biteq! (
+                        $vector::<LANES>::from_array(x).horizontal_max(),
+                        x.iter().copied().max().unwrap(),
+                    );
+                    Ok(())
+                });
+            }
+
+            fn horizontal_min<const LANES: usize>() {
+                test_helpers::test_1(&|x| {
+                    test_helpers::prop_assert_biteq! (
+                        $vector::<LANES>::from_array(x).horizontal_min(),
+                        x.iter().copied().min().unwrap(),
+                    );
+                    Ok(())
+                });
+            }
+        }
+    }
+}
+
 /// Implement tests for signed integers.
 #[macro_export]
 macro_rules! impl_signed_tests {
@@ -144,6 +221,8 @@ macro_rules! impl_signed_tests {
             type Vector<const LANES: usize> = core_simd::$vector<LANES>;
             type Scalar = $scalar;
 
+            impl_common_integer_tests! { Vector, Scalar }
+
             test_helpers::test_lanes! {
                 fn neg<const LANES: usize>() {
                     test_helpers::test_unary_elementwise(
@@ -241,6 +320,8 @@ macro_rules! impl_unsigned_tests {
             type Vector<const LANES: usize> = core_simd::$vector<LANES>;
             type Scalar = $scalar;
 
+            impl_common_integer_tests! { Vector, Scalar }
+
             test_helpers::test_lanes_panic! {
                 fn rem_zero_panic<const LANES: usize>() {
                     let a = Vector::<LANES>::splat(42);
@@ -397,6 +478,50 @@ macro_rules! impl_float_tests {
                         },
                     ).unwrap();
                 }
+
+                fn horizontal_sum<const LANES: usize>() {
+                    test_helpers::test_1(&|x| {
+                        test_helpers::prop_assert_biteq! (
+                            Vector::<LANES>::from_array(x).horizontal_sum(),
+                            x.iter().sum(),
+                        );
+                        Ok(())
+                    });
+                }
+
+                fn horizontal_product<const LANES: usize>() {
+                    test_helpers::test_1(&|x| {
+                        test_helpers::prop_assert_biteq! (
+                            Vector::<LANES>::from_array(x).horizontal_product(),
+                            x.iter().product(),
+                        );
+                        Ok(())
+                    });
+                }
+
+                fn horizontal_max<const LANES: usize>() {
+                    test_helpers::test_1(&|x| {
+                        let vmax = Vector::<LANES>::from_array(x).horizontal_max();
+                        let smax = x.iter().copied().fold(Scalar::NAN, Scalar::max);
+                        // 0 and -0 are treated the same
+                        if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) {
+                            test_helpers::prop_assert_biteq!(vmax, smax);
+                        }
+                        Ok(())
+                    });
+                }
+
+                fn horizontal_min<const LANES: usize>() {
+                    test_helpers::test_1(&|x| {
+                        let vmax = Vector::<LANES>::from_array(x).horizontal_min();
+                        let smax = x.iter().copied().fold(Scalar::NAN, Scalar::min);
+                        // 0 and -0 are treated the same
+                        if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) {
+                            test_helpers::prop_assert_biteq!(vmax, smax);
+                        }
+                        Ok(())
+                    });
+                }
             }
         }
     }
diff --git a/crates/test_helpers/src/biteq.rs b/crates/test_helpers/src/biteq.rs
index 4a41fe3a16e..00350e22418 100644
--- a/crates/test_helpers/src/biteq.rs
+++ b/crates/test_helpers/src/biteq.rs
@@ -95,7 +95,7 @@ impl<T: BitEq> core::fmt::Debug for BitEqWrapper<'_, T> {
 
 #[macro_export]
 macro_rules! prop_assert_biteq {
-    { $a:expr, $b:expr } => {
+    { $a:expr, $b:expr $(,)? } => {
         {
             use $crate::biteq::BitEqWrapper;
             let a = $a;