21 files changed, 438 insertions, 123 deletions
diff --git a/library/portable-simd/crates/core_simd/examples/spectral_norm.rs b/library/portable-simd/crates/core_simd/examples/spectral_norm.rs
new file mode 100644
index 00000000000..c515dad4dea
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/examples/spectral_norm.rs
@@ -0,0 +1,77 @@
+#![feature(portable_simd)]
+
+use core_simd::simd::*;
+
+fn a(i: usize, j: usize) -> f64 {
+    ((i + j) * (i + j + 1) / 2 + i + 1) as f64
+}
+
+fn mult_av(v: &[f64], out: &mut [f64]) {
+    assert!(v.len() == out.len());
+    assert!(v.len() % 2 == 0);
+
+    for (i, out) in out.iter_mut().enumerate() {
+        let mut sum = f64x2::splat(0.0);
+
+        let mut j = 0;
+        while j < v.len() {
+            let b = f64x2::from_slice(&v[j..]);
+            let a = f64x2::from_array([a(i, j), a(i, j + 1)]);
+            sum += b / a;
+            j += 2
+        }
+        *out = sum.horizontal_sum();
+    }
+}
+
+fn mult_atv(v: &[f64], out: &mut [f64]) {
+    assert!(v.len() == out.len());
+    assert!(v.len() % 2 == 0);
+
+    for (i, out) in out.iter_mut().enumerate() {
+        let mut sum = f64x2::splat(0.0);
+
+        let mut j = 0;
+        while j < v.len() {
+            let b = f64x2::from_slice(&v[j..]);
+            let a = f64x2::from_array([a(j, i), a(j + 1, i)]);
+            sum += b / a;
+            j += 2
+        }
+        *out = sum.horizontal_sum();
+    }
+}
+
+fn mult_atav(v: &[f64], out: &mut [f64], tmp: &mut [f64]) {
+    mult_av(v, tmp);
+    mult_atv(tmp, out);
+}
+
+pub fn spectral_norm(n: usize) -> f64 {
+    assert!(n % 2 == 0, "only even lengths are accepted");
+
+    let mut u = vec![1.0; n];
+    let mut v = u.clone();
+    let mut tmp = u.clone();
+
+    for _ in 0..10 {
+        mult_atav(&u, &mut v, &mut tmp);
+        mult_atav(&v, &mut u, &mut tmp);
+    }
+    (dot(&u, &v) / dot(&v, &v)).sqrt()
+}
+
+fn dot(x: &[f64], y: &[f64]) -> f64 {
+    // This is auto-vectorized:
+    x.iter().zip(y).map(|(&x, &y)| x * y).sum()
+}
+
+#[cfg(test)]
+#[test]
+fn test() {
+    assert_eq!(&format!("{:.9}", spectral_norm(100)), "1.274219991");
+}
+
+fn main() {
+    // Empty main to make cargo happy
+}
diff --git a/library/portable-simd/crates/core_simd/src/comparisons.rs b/library/portable-simd/crates/core_simd/src/comparisons.rs
index edef5af3687..d024cf4ddbe 100644
--- a/library/portable-simd/crates/core_simd/src/comparisons.rs
+++ b/library/portable-simd/crates/core_simd/src/comparisons.rs
@@ -10,6 +10,8 @@ where
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub fn lanes_eq(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
         unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) }
     }
 
@@ -17,6 +19,8 @@ where
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub fn lanes_ne(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
         unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) }
     }
 }
@@ -30,6 +34,8 @@ where
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub fn lanes_lt(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
         unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) }
     }
 
@@ -37,6 +43,8 @@ where
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub fn lanes_gt(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
         unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) }
     }
 
@@ -44,6 +52,8 @@ where
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub fn lanes_le(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
         unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) }
     }
 
@@ -51,6 +61,8 @@ where
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub fn lanes_ge(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
         unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) }
     }
 }
diff --git a/library/portable-simd/crates/core_simd/src/intrinsics.rs b/library/portable-simd/crates/core_simd/src/intrinsics.rs
index 2291400537c..4c68d11e893 100644
--- a/library/portable-simd/crates/core_simd/src/intrinsics.rs
+++ b/library/portable-simd/crates/core_simd/src/intrinsics.rs
@@ -2,31 +2,55 @@
 //! crate.
 //!
 //! The LLVM assembly language is documented here: <https://llvm.org/docs/LangRef.html>
+//!
+//! A quick glossary of jargon that may appear in this module, mostly paraphrasing LLVM's LangRef:
+//! - poison: "undefined behavior as a value". specifically, it is like uninit memory (such as padding bytes). it is "safe" to create poison, BUT
+//!   poison MUST NOT be observed from safe code, as operations on poison return poison, like NaN. unlike NaN, which has defined comparisons,
+//!   poison is neither true nor false, and LLVM may also convert it to undef (at which point it is both). so, it can't be conditioned on, either.
+//! - undef: "a value that is every value". functionally like poison, insofar as Rust is concerned. poison may become this. note:
+//!   this means that division by poison or undef is like division by zero, which means it inflicts...
+//! - "UB": poison and undef cover most of what people call "UB". "UB" means this operation immediately invalidates the program:
+//!   LLVM is allowed to lower it to `ud2` or other opcodes that may cause an illegal instruction exception, and this is the "good end".
+//!   The "bad end" is that LLVM may reverse time to the moment control flow diverged on a path towards undefined behavior,
+//!   and destroy the other branch, potentially deleting safe code and violating Rust's `unsafe` contract.
+//!
+//! Note that according to LLVM, vectors are not arrays, but they are equivalent when stored to and loaded from memory.
+//!
+//! Unless stated otherwise, all intrinsics for binary operations require SIMD vectors of equal types and lengths.
 
 /// These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are
-/// simply lowered to the matching LLVM instructions by the compiler.  The associated instruction
-/// is documented alongside each intrinsic.
+/// mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner.
+/// The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function.
 extern "platform-intrinsic" {
     /// add/fadd
     pub(crate) fn simd_add<T>(x: T, y: T) -> T;
 
     /// sub/fsub
-    pub(crate) fn simd_sub<T>(x: T, y: T) -> T;
+    pub(crate) fn simd_sub<T>(lhs: T, rhs: T) -> T;
 
     /// mul/fmul
     pub(crate) fn simd_mul<T>(x: T, y: T) -> T;
 
     /// udiv/sdiv/fdiv
-    pub(crate) fn simd_div<T>(x: T, y: T) -> T;
+    /// ints and uints: {s,u}div incur UB if division by zero occurs.
+    /// ints: sdiv is UB for int::MIN / -1.
+    /// floats: fdiv is never UB, but may create NaNs or infinities.
+    pub(crate) fn simd_div<T>(lhs: T, rhs: T) -> T;
 
     /// urem/srem/frem
-    pub(crate) fn simd_rem<T>(x: T, y: T) -> T;
+    /// ints and uints: {s,u}rem incur UB if division by zero occurs.
+    /// ints: srem is UB for int::MIN / -1.
+    /// floats: frem is equivalent to libm::fmod in the "default" floating point environment, sans errno.
+    pub(crate) fn simd_rem<T>(lhs: T, rhs: T) -> T;
 
     /// shl
-    pub(crate) fn simd_shl<T>(x: T, y: T) -> T;
+    /// for (u)ints. poison if rhs >= lhs::BITS
+    pub(crate) fn simd_shl<T>(lhs: T, rhs: T) -> T;
 
-    /// lshr/ashr
-    pub(crate) fn simd_shr<T>(x: T, y: T) -> T;
+    /// ints: ashr
+    /// uints: lshr
+    /// poison if rhs >= lhs::BITS
+    pub(crate) fn simd_shr<T>(lhs: T, rhs: T) -> T;
 
     /// and
     pub(crate) fn simd_and<T>(x: T, y: T) -> T;
@@ -38,12 +62,18 @@ extern "platform-intrinsic" {
     pub(crate) fn simd_xor<T>(x: T, y: T) -> T;
 
     /// fptoui/fptosi/uitofp/sitofp
+    /// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5
+    /// but the truncated value must fit in the target type or the result is poison.
+    /// use `simd_as` instead for a cast that performs a saturating conversion.
     pub(crate) fn simd_cast<T, U>(x: T) -> U;
     /// follows Rust's `T as U` semantics, including saturating float casts
     /// which amounts to the same as `simd_cast` for many cases
     pub(crate) fn simd_as<T, U>(x: T) -> U;
 
     /// neg/fneg
+    /// ints: ultimately becomes a call to cg_ssa's BuilderMethods::neg. cg_llvm equates this to `simd_sub(Simd::splat(0), x)`.
+    /// floats: LLVM's fneg, which changes the floating point sign bit. Some arches have instructions for it.
+    /// Rust panics for Neg::neg(int::MIN) due to overflow, but it is not UB in LLVM without `nsw`.
     pub(crate) fn simd_neg<T>(x: T) -> T;
 
     /// fabs
@@ -53,6 +83,7 @@ extern "platform-intrinsic" {
     pub(crate) fn simd_fmin<T>(x: T, y: T) -> T;
     pub(crate) fn simd_fmax<T>(x: T, y: T) -> T;
 
+    // these return Simd<int, N> with the same BITS size as the inputs
     pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
     pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
     pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
@@ -61,19 +92,31 @@ extern "platform-intrinsic" {
     pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;
 
     // shufflevector
+    // idx: LLVM calls it a "shuffle mask vector constant", a vector of i32s
     pub(crate) fn simd_shuffle<T, U, V>(x: T, y: T, idx: U) -> V;
 
+    /// llvm.masked.gather
+    /// like a loop of pointer reads
+    /// val: vector of values to select if a lane is masked
+    /// ptr: vector of pointers to read from
+    /// mask: a "wide" mask of integers, selects as if simd_select(mask, read(ptr), val)
+    /// note, the LLVM intrinsic accepts a mask vector of <N x i1>
+    /// FIXME: review this if/when we fix up our mask story in general?
     pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
+    /// llvm.masked.scatter
+    /// like gather, but more spicy, as it writes instead of reads
     pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);
 
     // {s,u}add.sat
     pub(crate) fn simd_saturating_add<T>(x: T, y: T) -> T;
 
     // {s,u}sub.sat
-    pub(crate) fn simd_saturating_sub<T>(x: T, y: T) -> T;
+    pub(crate) fn simd_saturating_sub<T>(lhs: T, rhs: T) -> T;
 
     // reductions
+    // llvm.vector.reduce.{add,fadd}
     pub(crate) fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
+    // llvm.vector.reduce.{mul,fmul}
     pub(crate) fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
     #[allow(unused)]
     pub(crate) fn simd_reduce_all<T>(x: T) -> bool;
@@ -90,7 +133,10 @@ extern "platform-intrinsic" {
     pub(crate) fn simd_bitmask<T, U>(x: T) -> U;
 
     // select
-    pub(crate) fn simd_select<M, T>(m: M, a: T, b: T) -> T;
+    // first argument is a vector of integers, -1 (all bits 1) is "true"
+    // logically equivalent to (yes & m) | (no & (m^-1),
+    // but you can use it on floats.
+    pub(crate) fn simd_select<M, T>(m: M, yes: T, no: T) -> T;
     #[allow(unused)]
-    pub(crate) fn simd_select_bitmask<M, T>(m: M, a: T, b: T) -> T;
+    pub(crate) fn simd_select_bitmask<M, T>(m: M, yes: T, no: T) -> T;
 }
diff --git a/library/portable-simd/crates/core_simd/src/lib.rs b/library/portable-simd/crates/core_simd/src/lib.rs
index 960a6640083..91ae34c05e0 100644
--- a/library/portable-simd/crates/core_simd/src/lib.rs
+++ b/library/portable-simd/crates/core_simd/src/lib.rs
@@ -1,7 +1,9 @@
 #![cfg_attr(not(feature = "std"), no_std)]
 #![feature(
     const_fn_trait_bound,
+    convert_float_to_int,
     decl_macro,
+    intra_doc_pointers,
     platform_intrinsics,
     repr_simd,
     simd_ffi,
diff --git a/library/portable-simd/crates/core_simd/src/masks.rs b/library/portable-simd/crates/core_simd/src/masks.rs
index ae1fef53da8..e1cd7930450 100644
--- a/library/portable-simd/crates/core_simd/src/masks.rs
+++ b/library/portable-simd/crates/core_simd/src/masks.rs
@@ -12,8 +12,10 @@
 )]
 mod mask_impl;
 
-use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+mod to_bitmask;
+pub use to_bitmask::ToBitMask;
+
+use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount};
 use core::cmp::Ordering;
 use core::{fmt, mem};
 
@@ -42,6 +44,9 @@ mod sealed {
 use sealed::Sealed;
 
 /// Marker trait for types that may be used as SIMD mask elements.
+///
+/// # Safety
+/// Type must be a signed integer.
 pub unsafe trait MaskElement: SimdElement + Sealed {}
 
 macro_rules! impl_element {
@@ -149,6 +154,7 @@ where
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
+        // Safety: the caller must confirm this invariant
         unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) }
     }
 
@@ -161,6 +167,7 @@ where
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub fn from_int(value: Simd<T, LANES>) -> Self {
         assert!(T::valid(value), "all values must be either 0 or -1",);
+        // Safety: the validity has been checked
         unsafe { Self::from_int_unchecked(value) }
     }
 
@@ -179,6 +186,7 @@ where
     #[inline]
     #[must_use = "method returns a new bool and does not mutate the original value"]
     pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
+        // Safety: the caller must confirm this invariant
         unsafe { self.0.test_unchecked(lane) }
     }
 
@@ -190,6 +198,7 @@ where
     #[must_use = "method returns a new bool and does not mutate the original value"]
     pub fn test(&self, lane: usize) -> bool {
         assert!(lane < LANES, "lane index out of range");
+        // Safety: the lane index has been checked
         unsafe { self.test_unchecked(lane) }
     }
 
@@ -199,6 +208,7 @@ where
     /// `lane` must be less than `LANES`.
     #[inline]
     pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
+        // Safety: the caller must confirm this invariant
         unsafe {
             self.0.set_unchecked(lane, value);
         }
@@ -211,27 +221,12 @@ where
     #[inline]
     pub fn set(&mut self, lane: usize, value: bool) {
         assert!(lane < LANES, "lane index out of range");
+        // Safety: the lane index has been checked
         unsafe {
             self.set_unchecked(lane, value);
         }
     }
 
-    /// Convert this mask to a bitmask, with one bit set per lane.
-    #[cfg(feature = "generic_const_exprs")]
-    #[inline]
-    #[must_use = "method returns a new array and does not mutate the original value"]
-    pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
-        self.0.to_bitmask()
-    }
-
-    /// Convert a bitmask to a mask.
-    #[cfg(feature = "generic_const_exprs")]
-    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn from_bitmask(bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
-        Self(mask_impl::Mask::from_bitmask(bitmask))
-    }
-
     /// Returns true if any lane is set, or false otherwise.
     #[inline]
     #[must_use = "method returns a new bool and does not mutate the original value"]
diff --git a/library/portable-simd/crates/core_simd/src/masks/bitmask.rs b/library/portable-simd/crates/core_simd/src/masks/bitmask.rs
index b4217dc87ba..ec4dd357ee9 100644
--- a/library/portable-simd/crates/core_simd/src/masks/bitmask.rs
+++ b/library/portable-simd/crates/core_simd/src/masks/bitmask.rs
@@ -1,7 +1,7 @@
 #![allow(unused_imports)]
 use super::MaskElement;
 use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
 use core::marker::PhantomData;
 
 /// A mask where each lane is represented by a single bit.
@@ -115,20 +115,22 @@ where
         unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) }
     }
 
-    #[cfg(feature = "generic_const_exprs")]
     #[inline]
-    #[must_use = "method returns a new array and does not mutate the original value"]
-    pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
-        // Safety: these are the same type and we are laundering the generic
+    pub fn to_bitmask_integer<U>(self) -> U
+    where
+        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
+    {
+        // Safety: these are the same types
         unsafe { core::mem::transmute_copy(&self.0) }
     }
 
-    #[cfg(feature = "generic_const_exprs")]
     #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn from_bitmask(bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
-        // Safety: these are the same type and we are laundering the generic
-        Self(unsafe { core::mem::transmute_copy(&bitmask) }, PhantomData)
+    pub fn from_bitmask_integer<U>(bitmask: U) -> Self
+    where
+        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
+    {
+        // Safety: these are the same types
+        unsafe { Self(core::mem::transmute_copy(&bitmask), PhantomData) }
     }
 
     #[inline]
@@ -137,6 +139,7 @@ where
     where
         U: MaskElement,
     {
+        // Safety: bitmask layout does not depend on the element width
         unsafe { core::mem::transmute_copy(&self) }
     }
 
diff --git a/library/portable-simd/crates/core_simd/src/masks/full_masks.rs b/library/portable-simd/crates/core_simd/src/masks/full_masks.rs
index e5bb784bb91..8bbdf637de8 100644
--- a/library/portable-simd/crates/core_simd/src/masks/full_masks.rs
+++ b/library/portable-simd/crates/core_simd/src/masks/full_masks.rs
@@ -2,7 +2,7 @@
 
 use super::MaskElement;
 use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
 
 #[repr(transparent)]
 pub struct Mask<T, const LANES: usize>(Simd<T, LANES>)
@@ -66,6 +66,23 @@ where
     }
 }
 
+// Used for bitmask bit order workaround
+pub(crate) trait ReverseBits {
+    fn reverse_bits(self) -> Self;
+}
+
+macro_rules! impl_reverse_bits {
+    { $($int:ty),* } => {
+        $(
+        impl ReverseBits for $int {
+            fn reverse_bits(self) -> Self { <$int>::reverse_bits(self) }
+        }
+        )*
+    }
+}
+
+impl_reverse_bits! { u8, u16, u32, u64 }
+
 impl<T, const LANES: usize> Mask<T, LANES>
 where
     T: MaskElement,
@@ -106,44 +123,40 @@ where
     where
         U: MaskElement,
     {
+        // Safety: masks are simply integer vectors of 0 and -1, and we can cast the element type.
         unsafe { Mask(intrinsics::simd_cast(self.0)) }
     }
 
-    #[cfg(feature = "generic_const_exprs")]
     #[inline]
-    #[must_use = "method returns a new array and does not mutate the original value"]
-    pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
-        unsafe {
-            let mut bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN] =
-                intrinsics::simd_bitmask(self.0);
-
-            // There is a bug where LLVM appears to implement this operation with the wrong
-            // bit order.
-            // TODO fix this in a better way
-            if cfg!(target_endian = "big") {
-                for x in bitmask.as_mut() {
-                    *x = x.reverse_bits();
-                }
-            }
+    pub(crate) fn to_bitmask_integer<U: ReverseBits>(self) -> U
+    where
+        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
+    {
+        // Safety: U is required to be the appropriate bitmask type
+        let bitmask: U = unsafe { intrinsics::simd_bitmask(self.0) };
 
+        // LLVM assumes bit order should match endianness
+        if cfg!(target_endian = "big") {
+            bitmask.reverse_bits()
+        } else {
             bitmask
         }
     }
 
-    #[cfg(feature = "generic_const_exprs")]
     #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn from_bitmask(mut bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
-        unsafe {
-            // There is a bug where LLVM appears to implement this operation with the wrong
-            // bit order.
-            // TODO fix this in a better way
-            if cfg!(target_endian = "big") {
-                for x in bitmask.as_mut() {
-                    *x = x.reverse_bits();
-                }
-            }
+    pub(crate) fn from_bitmask_integer<U: ReverseBits>(bitmask: U) -> Self
+    where
+        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
+    {
+        // LLVM assumes bit order should match endianness
+        let bitmask = if cfg!(target_endian = "big") {
+            bitmask.reverse_bits()
+        } else {
+            bitmask
+        };
 
+        // Safety: U is required to be the appropriate bitmask type
+        unsafe {
             Self::from_int_unchecked(intrinsics::simd_select_bitmask(
                 bitmask,
                 Self::splat(true).to_int(),
@@ -155,12 +168,14 @@ where
     #[inline]
     #[must_use = "method returns a new bool and does not mutate the original value"]
     pub fn any(self) -> bool {
+        // Safety: use `self` as an integer vector
         unsafe { intrinsics::simd_reduce_any(self.to_int()) }
     }
 
     #[inline]
     #[must_use = "method returns a new vector and does not mutate the original value"]
     pub fn all(self) -> bool {
+        // Safety: use `self` as an integer vector
         unsafe { intrinsics::simd_reduce_all(self.to_int()) }
     }
 }
@@ -184,6 +199,7 @@ where
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn bitand(self, rhs: Self) -> Self {
+        // Safety: `self` is an integer vector
         unsafe { Self(intrinsics::simd_and(self.0, rhs.0)) }
     }
 }
@@ -197,6 +213,7 @@ where
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn bitor(self, rhs: Self) -> Self {
+        // Safety: `self` is an integer vector
         unsafe { Self(intrinsics::simd_or(self.0, rhs.0)) }
     }
 }
@@ -210,6 +227,7 @@ where
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn bitxor(self, rhs: Self) -> Self {
+        // Safety: `self` is an integer vector
         unsafe { Self(intrinsics::simd_xor(self.0, rhs.0)) }
     }
 }
diff --git a/library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs b/library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs
new file mode 100644
index 00000000000..1c2037764c1
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs
@@ -0,0 +1,57 @@
+use super::{mask_impl, Mask, MaskElement};
+use crate::simd::{LaneCount, SupportedLaneCount};
+
+mod sealed {
+    pub trait Sealed {}
+}
+pub use sealed::Sealed;
+
+impl<T, const LANES: usize> Sealed for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+}
+
+/// Converts masks to and from integer bitmasks.
+///
+/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB.
+///
+/// # Safety
+/// This trait is `unsafe` and sealed, since the `BitMask` type must match the number of lanes in
+/// the mask.
+pub unsafe trait ToBitMask: Sealed {
+    /// The integer bitmask type.
+    type BitMask;
+
+    /// Converts a mask to a bitmask.
+    fn to_bitmask(self) -> Self::BitMask;
+
+    /// Converts a bitmask to a mask.
+    fn from_bitmask(bitmask: Self::BitMask) -> Self;
+}
+
+macro_rules! impl_integer_intrinsic {
+    { $(unsafe impl ToBitMask<BitMask=$int:ty> for Mask<_, $lanes:literal>)* } => {
+        $(
+        unsafe impl<T: MaskElement> ToBitMask for Mask<T, $lanes> {
+            type BitMask = $int;
+
+            fn to_bitmask(self) -> $int {
+                self.0.to_bitmask_integer()
+            }
+
+            fn from_bitmask(bitmask: $int) -> Self {
+                Self(mask_impl::Mask::from_bitmask_integer(bitmask))
+            }
+        }
+        )*
+    }
+}
+
+impl_integer_intrinsic! {
+    unsafe impl ToBitMask<BitMask=u8> for Mask<_, 8>
+    unsafe impl ToBitMask<BitMask=u16> for Mask<_, 16>
+    unsafe impl ToBitMask<BitMask=u32> for Mask<_, 32>
+    unsafe impl ToBitMask<BitMask=u64> for Mask<_, 64>
+}
diff --git a/library/portable-simd/crates/core_simd/src/math.rs b/library/portable-simd/crates/core_simd/src/math.rs
index 7435b6df918..0b4e40983af 100644
--- a/library/portable-simd/crates/core_simd/src/math.rs
+++ b/library/portable-simd/crates/core_simd/src/math.rs
@@ -22,6 +22,7 @@ macro_rules! impl_uint_arith {
             /// ```
             #[inline]
             pub fn saturating_add(self, second: Self) -> Self {
+                // Safety: `self` is a vector
                 unsafe { simd_saturating_add(self, second) }
             }
 
@@ -41,6 +42,7 @@ macro_rules! impl_uint_arith {
             /// assert_eq!(sat, Simd::splat(0));
             #[inline]
             pub fn saturating_sub(self, second: Self) -> Self {
+                // Safety: `self` is a vector
                 unsafe { simd_saturating_sub(self, second) }
             }
         })+
@@ -68,6 +70,7 @@ macro_rules! impl_int_arith {
             /// ```
             #[inline]
             pub fn saturating_add(self, second: Self) -> Self {
+                // Safety: `self` is a vector
                 unsafe { simd_saturating_add(self, second) }
             }
 
@@ -87,6 +90,7 @@ macro_rules! impl_int_arith {
             /// assert_eq!(sat, Simd::from_array([MIN, MIN, MIN, 0]));
             #[inline]
             pub fn saturating_sub(self, second: Self) -> Self {
+                // Safety: `self` is a vector
                 unsafe { simd_saturating_sub(self, second) }
             }
 
diff --git a/library/portable-simd/crates/core_simd/src/ops.rs b/library/portable-simd/crates/core_simd/src/ops.rs
index b65038933bf..1b35b3e717a 100644
--- a/library/portable-simd/crates/core_simd/src/ops.rs
+++ b/library/portable-simd/crates/core_simd/src/ops.rs
@@ -57,29 +57,40 @@ macro_rules! wrap_bitshift {
     };
 }
 
-// Division by zero is poison, according to LLVM.
-// So is dividing the MIN value of a signed integer by -1,
-// since that would return MAX + 1.
-// FIXME: Rust allows <SInt>::MIN / -1,
-// so we should probably figure out how to make that safe.
+/// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic.
+/// It guards against LLVM's UB conditions for integer div or rem using masks and selects,
+/// thus guaranteeing a Rust value returns instead.
+///
+/// |                  | LLVM | Rust
+/// | :--------------: | :--- | :----------
+/// | N {/,%} 0        | UB   | panic!()
+/// | <$int>::MIN / -1 | UB   | <$int>::MIN
+/// | <$int>::MIN % -1 | UB   | 0
+///
 macro_rules! int_divrem_guard {
     (   $lhs:ident,
         $rhs:ident,
         {   const PANIC_ZERO: &'static str = $zero:literal;
-            const PANIC_OVERFLOW: &'static str = $overflow:literal;
             $simd_call:ident
         },
         $int:ident ) => {
         if $rhs.lanes_eq(Simd::splat(0)).any() {
             panic!($zero);
-        } else if <$int>::MIN != 0
-            && ($lhs.lanes_eq(Simd::splat(<$int>::MIN))
-                // type inference can break here, so cut an SInt to size
-                & $rhs.lanes_eq(Simd::splat(-1i64 as _))).any()
-        {
-            panic!($overflow);
         } else {
-            unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) }
+            // Prevent otherwise-UB overflow on the MIN / -1 case.
+            let rhs = if <$int>::MIN != 0 {
+                // This should, at worst, optimize to a few branchless logical ops
+                // Ideally, this entire conditional should evaporate
+                // Fire LLVM and implement those manually if it doesn't get the hint
+                ($lhs.lanes_eq(Simd::splat(<$int>::MIN))
+                // type inference can break here, so cut an SInt to size
+                & $rhs.lanes_eq(Simd::splat(-1i64 as _)))
+                .select(Simd::splat(1), $rhs)
+            } else {
+                // Nice base case to make it easy to const-fold away the other branch.
+                $rhs
+            };
+            unsafe { $crate::simd::intrinsics::$simd_call($lhs, rhs) }
         }
     };
 }
@@ -183,7 +194,6 @@ for_base_ops! {
     impl Div::div {
         int_divrem_guard {
             const PANIC_ZERO: &'static str = "attempt to divide by zero";
-            const PANIC_OVERFLOW: &'static str = "attempt to divide with overflow";
             simd_div
         }
     }
@@ -191,7 +201,6 @@ for_base_ops! {
     impl Rem::rem {
         int_divrem_guard {
             const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
-            const PANIC_OVERFLOW: &'static str = "attempt to calculate the remainder with overflow";
             simd_rem
         }
     }
diff --git a/library/portable-simd/crates/core_simd/src/reduction.rs b/library/portable-simd/crates/core_simd/src/reduction.rs
index e79a185816b..e1cd743e442 100644
--- a/library/portable-simd/crates/core_simd/src/reduction.rs
+++ b/library/portable-simd/crates/core_simd/src/reduction.rs
@@ -14,24 +14,28 @@ macro_rules! impl_integer_reductions {
             /// Horizontal wrapping add.  Returns the sum of the lanes of the vector, with wrapping addition.
             #[inline]
             pub fn horizontal_sum(self) -> $scalar {
+                // Safety: `self` is an integer vector
                 unsafe { simd_reduce_add_ordered(self, 0) }
             }
 
             /// Horizontal wrapping multiply.  Returns the product of the lanes of the vector, with wrapping multiplication.
             #[inline]
             pub fn horizontal_product(self) -> $scalar {
+                // Safety: `self` is an integer vector
                 unsafe { simd_reduce_mul_ordered(self, 1) }
             }
 
             /// Horizontal maximum.  Returns the maximum lane in the vector.
             #[inline]
             pub fn horizontal_max(self) -> $scalar {
+                // Safety: `self` is an integer vector
                 unsafe { simd_reduce_max(self) }
             }
 
             /// Horizontal minimum.  Returns the minimum lane in the vector.
             #[inline]
             pub fn horizontal_min(self) -> $scalar {
+                // Safety: `self` is an integer vector
                 unsafe { simd_reduce_min(self) }
             }
         }
@@ -63,6 +67,7 @@ macro_rules! impl_float_reductions {
                 if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
                     self.as_array().iter().sum()
                 } else {
+                    // Safety: `self` is a float vector
                     unsafe { simd_reduce_add_ordered(self, 0.) }
                 }
             }
@@ -74,6 +79,7 @@ macro_rules! impl_float_reductions {
                 if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
                     self.as_array().iter().product()
                 } else {
+                    // Safety: `self` is a float vector
                     unsafe { simd_reduce_mul_ordered(self, 1.) }
                 }
             }
@@ -84,6 +90,7 @@ macro_rules! impl_float_reductions {
             /// return either.  This function will not return `NaN` unless all lanes are `NaN`.
             #[inline]
             pub fn horizontal_max(self) -> $scalar {
+                // Safety: `self` is a float vector
                 unsafe { simd_reduce_max(self) }
             }
 
@@ -93,6 +100,7 @@ macro_rules! impl_float_reductions {
             /// return either.  This function will not return `NaN` unless all lanes are `NaN`.
             #[inline]
             pub fn horizontal_min(self) -> $scalar {
+                // Safety: `self` is a float vector
                 unsafe { simd_reduce_min(self) }
             }
         }
diff --git a/library/portable-simd/crates/core_simd/src/round.rs b/library/portable-simd/crates/core_simd/src/round.rs
index 06ccab3ec49..556bc2cc1fe 100644
--- a/library/portable-simd/crates/core_simd/src/round.rs
+++ b/library/portable-simd/crates/core_simd/src/round.rs
@@ -1,9 +1,10 @@
 use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+use core::convert::FloatToInt;
 
 macro_rules! implement {
     {
-        $type:ty, $int_type:ty
+        $type:ty
     } => {
         impl<const LANES: usize> Simd<$type, LANES>
         where
@@ -18,20 +19,22 @@ macro_rules! implement {
             /// * Not be NaN
             /// * Not be infinite
             /// * Be representable in the return type, after truncating off its fractional part
+            ///
+            /// If these requirements are infeasible or costly, consider using the safe function [cast],
+            /// which saturates on conversion.
+            ///
+            /// [cast]: Simd::cast
             #[inline]
-            pub unsafe fn to_int_unchecked(self) -> Simd<$int_type, LANES> {
+            pub unsafe fn to_int_unchecked<I>(self) -> Simd<I, LANES>
+            where
+                $type: FloatToInt<I>,
+                I: SimdElement,
+            {
                 unsafe { intrinsics::simd_cast(self) }
             }
-
-            /// Creates a floating-point vector from an integer vector.  Rounds values that are
-            /// not exactly representable.
-            #[inline]
-            pub fn round_from_int(value: Simd<$int_type, LANES>) -> Self {
-                unsafe { intrinsics::simd_cast(value) }
-            }
         }
     }
 }
 
-implement! { f32, i32 }
-implement! { f64, i64 }
+implement! { f32 }
+implement! { f64 }
diff --git a/library/portable-simd/crates/core_simd/src/select.rs b/library/portable-simd/crates/core_simd/src/select.rs
index 8d521057fbd..3acf07260e1 100644
--- a/library/portable-simd/crates/core_simd/src/select.rs
+++ b/library/portable-simd/crates/core_simd/src/select.rs
@@ -11,6 +11,7 @@ where
     /// For each lane in the mask, choose the corresponding lane from `true_values` if
     /// that lane mask is true, and `false_values` if that lane mask is false.
     ///
+    /// # Examples
     /// ```
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
@@ -31,6 +32,8 @@ where
     where
         U: SimdElement<Mask = T>,
     {
+        // Safety: The mask has been cast to a vector of integers,
+        // and the operands to select between are vectors of the same type and length.
         unsafe { intrinsics::simd_select(self.to_int(), true_values, false_values) }
     }
 
@@ -39,6 +42,7 @@ where
     /// For each lane in the mask, choose the corresponding lane from `true_values` if
     /// that lane mask is true, and `false_values` if that lane mask is false.
     ///
+    /// # Examples
     /// ```
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "std")] use core_simd::Mask;
diff --git a/library/portable-simd/crates/core_simd/src/swizzle.rs b/library/portable-simd/crates/core_simd/src/swizzle.rs
index bdc489774a5..08b2add1166 100644
--- a/library/portable-simd/crates/core_simd/src/swizzle.rs
+++ b/library/portable-simd/crates/core_simd/src/swizzle.rs
@@ -95,6 +95,7 @@ pub trait Swizzle<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
         LaneCount<INPUT_LANES>: SupportedLaneCount,
         LaneCount<OUTPUT_LANES>: SupportedLaneCount,
     {
+        // Safety: `vector` is a vector, and `INDEX_IMPL` is a const array of u32.
         unsafe { intrinsics::simd_shuffle(vector, vector, Self::INDEX_IMPL) }
     }
 }
@@ -119,6 +120,7 @@ pub trait Swizzle2<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
         LaneCount<INPUT_LANES>: SupportedLaneCount,
         LaneCount<OUTPUT_LANES>: SupportedLaneCount,
     {
+        // Safety: `first` and `second` are vectors, and `INDEX_IMPL` is a const array of u32.
         unsafe { intrinsics::simd_shuffle(first, second, Self::INDEX_IMPL) }
     }
 }
diff --git a/library/portable-simd/crates/core_simd/src/to_bytes.rs b/library/portable-simd/crates/core_simd/src/to_bytes.rs
index 8d9b3e8ff85..b36b1a347b2 100644
--- a/library/portable-simd/crates/core_simd/src/to_bytes.rs
+++ b/library/portable-simd/crates/core_simd/src/to_bytes.rs
@@ -8,12 +8,14 @@ macro_rules! impl_to_bytes {
             /// Return the memory representation of this integer as a byte array in native byte
             /// order.
             pub fn to_ne_bytes(self) -> crate::simd::Simd<u8, {{ $size * LANES }}> {
+                // Safety: transmuting between vectors is safe
                 unsafe { core::mem::transmute_copy(&self) }
             }
 
             /// Create a native endian integer value from its memory representation as a byte array
             /// in native endianness.
             pub fn from_ne_bytes(bytes: crate::simd::Simd<u8, {{ $size * LANES }}>) -> Self {
+                // Safety: transmuting between vectors is safe
                 unsafe { core::mem::transmute_copy(&bytes) }
             }
         }
diff --git a/library/portable-simd/crates/core_simd/src/vector.rs b/library/portable-simd/crates/core_simd/src/vector.rs
index 35c5b6b84f8..3ccaf54b2a3 100644
--- a/library/portable-simd/crates/core_simd/src/vector.rs
+++ b/library/portable-simd/crates/core_simd/src/vector.rs
@@ -12,7 +12,79 @@ pub(crate) mod ptr;
 use crate::simd::intrinsics;
 use crate::simd::{LaneCount, Mask, MaskElement, SupportedLaneCount};
 
-/// A SIMD vector of `LANES` elements of type `T`.
+/// A SIMD vector of `LANES` elements of type `T`. `Simd<T, N>` has the same shape as [`[T; N]`](array), but operates like `T`.
+///
+/// Two vectors of the same type and length will, by convention, support the operators (+, *, etc.) that `T` does.
+/// These take the lanes at each index on the left-hand side and right-hand side, perform the operation,
+/// and return the result in the same lane in a vector of equal size. For a given operator, this is equivalent to zipping
+/// the two arrays together and mapping the operator over each lane.
+///
+/// ```rust
+/// # #![feature(array_zip, portable_simd)]
+/// # use core::simd::{Simd};
+/// let a0: [i32; 4] = [-2, 0, 2, 4];
+/// let a1 = [10, 9, 8, 7];
+/// let zm_add = a0.zip(a1).map(|(lhs, rhs)| lhs + rhs);
+/// let zm_mul = a0.zip(a1).map(|(lhs, rhs)| lhs * rhs);
+///
+/// // `Simd<T, N>` implements `From<[T; N]>
+/// let (v0, v1) = (Simd::from(a0), Simd::from(a1));
+/// // Which means arrays implement `Into<Simd<T, N>>`.
+/// assert_eq!(v0 + v1, zm_add.into());
+/// assert_eq!(v0 * v1, zm_mul.into());
+/// ```
+///
+/// `Simd` with integers has the quirk that these operations are also inherently wrapping, as if `T` was [`Wrapping<T>`].
+/// Thus, `Simd` does not implement `wrapping_add`, because that is the default behavior.
+/// This means there is no warning on overflows, even in "debug" builds.
+/// For most applications where `Simd` is appropriate, it is "not a bug" to wrap,
+/// and even "debug builds" are unlikely to tolerate the loss of performance.
+/// You may want to consider using explicitly checked arithmetic if such is required.
+/// Division by zero still causes a panic, so you may want to consider using floating point numbers if that is unacceptable.
+///
+/// [`Wrapping<T>`]: core::num::Wrapping
+///
+/// # Layout
+/// `Simd<T, N>` has a layout similar to `[T; N]` (identical "shapes"), but with a greater alignment.
+/// `[T; N]` is aligned to `T`, but `Simd<T, N>` will have an alignment based on both `T` and `N`.
+/// It is thus sound to [`transmute`] `Simd<T, N>` to `[T; N]`, and will typically optimize to zero cost,
+/// but the reverse transmutation is more likely to require a copy the compiler cannot simply elide.
+///
+/// # ABI "Features"
+/// Due to Rust's safety guarantees, `Simd<T, N>` is currently passed to and from functions via memory, not SIMD registers,
+/// except as an optimization. `#[inline]` hints are recommended on functions that accept `Simd<T, N>` or return it.
+/// The need for this may be corrected in the future.
+///
+/// # Safe SIMD with Unsafe Rust
+///
+/// Operations with `Simd` are typically safe, but there are many reasons to want to combine SIMD with `unsafe` code.
+/// Care must be taken to respect differences between `Simd` and other types it may be transformed into or derived from.
+/// In particular, the layout of `Simd<T, N>` may be similar to `[T; N]`, and may allow some transmutations,
+/// but references to `[T; N]` are not interchangeable with those to `Simd<T, N>`.
+/// Thus, when using `unsafe` Rust to read and write `Simd<T, N>` through [raw pointers], it is a good idea to first try with
+/// [`read_unaligned`] and [`write_unaligned`]. This is because:
+/// - [`read`] and [`write`] require full alignment (in this case, `Simd<T, N>`'s alignment)
+/// - the likely source for reading or destination for writing `Simd<T, N>` is [`[T]`](slice) and similar types, aligned to `T`
+/// - combining these actions would violate the `unsafe` contract and explode the program into a puff of **undefined behavior**
+/// - the compiler can implicitly adjust layouts to make unaligned reads or writes fully aligned if it sees the optimization
+/// - most contemporary processors suffer no performance penalty for "unaligned" reads and writes that are aligned at runtime
+///
+/// By imposing less obligations, unaligned functions are less likely to make the program unsound,
+/// and may be just as fast as stricter alternatives.
+/// When trying to guarantee alignment, [`[T]::as_simd`][as_simd] is an option for converting `[T]` to `[Simd<T, N>]`,
+/// and allows soundly operating on an aligned SIMD body, but it may cost more time when handling the scalar head and tail.
+/// If these are not sufficient, then it is most ideal to design data structures to be already aligned
+/// to the `Simd<T, N>` you wish to use before using `unsafe` Rust to read or write.
+/// More conventional ways to compensate for these facts, like materializing `Simd` to or from an array first,
+/// are handled by safe methods like [`Simd::from_array`] and [`Simd::from_slice`].
+///
+/// [`transmute`]: core::mem::transmute
+/// [raw pointers]: pointer
+/// [`read_unaligned`]: pointer::read_unaligned
+/// [`write_unaligned`]: pointer::write_unaligned
+/// [`read`]: pointer::read
+/// [`write`]: pointer::write
+/// [as_simd]: slice::as_simd
 #[repr(simd)]
 pub struct Simd<T, const LANES: usize>([T; LANES])
 where
@@ -98,6 +170,7 @@ where
     #[must_use]
     #[inline]
     pub fn cast<U: SimdElement>(self) -> Simd<U, LANES> {
+        // Safety: The input argument is a vector of a known SIMD type.
         unsafe { intrinsics::simd_as(self) }
     }
 
@@ -171,7 +244,7 @@ where
         or: Self,
     ) -> Self {
         let enable: Mask<isize, LANES> = enable & idxs.lanes_lt(Simd::splat(slice.len()));
-        // SAFETY: We have masked-off out-of-bounds lanes.
+        // Safety: We have masked-off out-of-bounds lanes.
         unsafe { Self::gather_select_unchecked(slice, enable, idxs, or) }
     }
 
@@ -212,7 +285,7 @@ where
         let base_ptr = crate::simd::ptr::SimdConstPtr::splat(slice.as_ptr());
         // Ferris forgive me, I have done pointer arithmetic here.
         let ptrs = base_ptr.wrapping_add(idxs);
-        // SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
+        // Safety: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
         unsafe { intrinsics::simd_gather(or, ptrs, enable.to_int()) }
     }
 
@@ -264,7 +337,7 @@ where
         idxs: Simd<usize, LANES>,
     ) {
         let enable: Mask<isize, LANES> = enable & idxs.lanes_lt(Simd::splat(slice.len()));
-        // SAFETY: We have masked-off out-of-bounds lanes.
+        // Safety: We have masked-off out-of-bounds lanes.
         unsafe { self.scatter_select_unchecked(slice, enable, idxs) }
     }
 
@@ -303,7 +376,7 @@ where
         enable: Mask<isize, LANES>,
         idxs: Simd<usize, LANES>,
     ) {
-        // SAFETY: This block works with *mut T derived from &mut 'a [T],
+        // Safety: This block works with *mut T derived from &mut 'a [T],
         // which means it is delicate in Rust's borrowing model, circa 2021:
         // &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts!
         // Even though this block is largely safe methods, it must be exactly this way
@@ -483,7 +556,9 @@ mod sealed {
 use sealed::Sealed;
 
 /// Marker trait for types that may be used as SIMD vector elements.
-/// SAFETY: This trait, when implemented, asserts the compiler can monomorphize
+///
+/// # Safety
+/// This trait, when implemented, asserts the compiler can monomorphize
 /// `#[repr(simd)]` structs with the marked type as an element.
 /// Strictly, it is valid to impl if the vector will not be miscompiled.
 /// Practically, it is user-unfriendly to impl it if the vector won't compile,
diff --git a/library/portable-simd/crates/core_simd/src/vector/ptr.rs b/library/portable-simd/crates/core_simd/src/vector/ptr.rs
index c668d9a6eae..417d255c28d 100644
--- a/library/portable-simd/crates/core_simd/src/vector/ptr.rs
+++ b/library/portable-simd/crates/core_simd/src/vector/ptr.rs
@@ -21,6 +21,8 @@ where
     #[inline]
     #[must_use]
     pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
+        // Safety: converting pointers to usize and vice-versa is safe
+        // (even if using that pointer is not)
         unsafe {
             let x: Simd<usize, LANES> = mem::transmute_copy(&self);
             mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::<T>())) })
@@ -47,6 +49,8 @@ where
     #[inline]
     #[must_use]
     pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
+        // Safety: converting pointers to usize and vice-versa is safe
+        // (even if using that pointer is not)
         unsafe {
             let x: Simd<usize, LANES> = mem::transmute_copy(&self);
             mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::<T>())) })
diff --git a/library/portable-simd/crates/core_simd/src/vendor.rs b/library/portable-simd/crates/core_simd/src/vendor.rs
index e8ce7176b4f..9fb70218c95 100644
--- a/library/portable-simd/crates/core_simd/src/vendor.rs
+++ b/library/portable-simd/crates/core_simd/src/vendor.rs
@@ -9,6 +9,8 @@ macro_rules! from_transmute {
         impl core::convert::From<$from> for $to {
             #[inline]
             fn from(value: $from) -> $to {
+                // Safety: transmuting between vectors is safe, but the caller of this macro
+                // checks the invariants
                 unsafe { core::mem::transmute(value) }
             }
         }
diff --git a/library/portable-simd/crates/core_simd/tests/masks.rs b/library/portable-simd/crates/core_simd/tests/masks.rs
index 6a8ecd33a73..3aec36ca7b7 100644
--- a/library/portable-simd/crates/core_simd/tests/masks.rs
+++ b/library/portable-simd/crates/core_simd/tests/masks.rs
@@ -68,16 +68,16 @@ macro_rules! test_mask_api {
                 assert_eq!(core_simd::Mask::<$type, 8>::from_int(int), mask);
             }
 
-            #[cfg(feature = "generic_const_exprs")]
             #[test]
             fn roundtrip_bitmask_conversion() {
+                use core_simd::ToBitMask;
                 let values = [
                     true, false, false, true, false, false, true, false,
                     true, true, false, false, false, false, false, true,
                 ];
                 let mask = core_simd::Mask::<$type, 16>::from_array(values);
                 let bitmask = mask.to_bitmask();
-                assert_eq!(bitmask, [0b01001001, 0b10000011]);
+                assert_eq!(bitmask, 0b1000001101001001);
                 assert_eq!(core_simd::Mask::<$type, 16>::from_bitmask(bitmask), mask);
             }
         }
diff --git a/library/portable-simd/crates/core_simd/tests/ops_macros.rs b/library/portable-simd/crates/core_simd/tests/ops_macros.rs
index 4fb9de198ee..50f7a4ca170 100644
--- a/library/portable-simd/crates/core_simd/tests/ops_macros.rs
+++ b/library/portable-simd/crates/core_simd/tests/ops_macros.rs
@@ -210,15 +210,21 @@ macro_rules! impl_signed_tests {
                     )
                 }
 
-            }
+                fn div_min_may_overflow<const LANES: usize>() {
+                    let a = Vector::<LANES>::splat(Scalar::MIN);
+                    let b = Vector::<LANES>::splat(-1);
+                    assert_eq!(a / b, a);
+                }
 
-            test_helpers::test_lanes_panic! {
-                fn div_min_overflow_panics<const LANES: usize>() {
+                fn rem_min_may_overflow<const LANES: usize>() {
                     let a = Vector::<LANES>::splat(Scalar::MIN);
                     let b = Vector::<LANES>::splat(-1);
-                    let _ = a / b;
+                    assert_eq!(a % b, Vector::<LANES>::splat(0));
                 }
 
+            }
+
+            test_helpers::test_lanes_panic! {
                 fn div_by_all_zeros_panics<const LANES: usize>() {
                     let a = Vector::<LANES>::splat(42);
                     let b = Vector::<LANES>::splat(0);
@@ -232,12 +238,6 @@ macro_rules! impl_signed_tests {
                     let _ = a / b;
                 }
 
-                fn rem_min_overflow_panic<const LANES: usize>() {
-                    let a = Vector::<LANES>::splat(Scalar::MIN);
-                    let b = Vector::<LANES>::splat(-1);
-                    let _ = a % b;
-                }
-
                 fn rem_zero_panic<const LANES: usize>() {
                     let a = Vector::<LANES>::splat(42);
                     let b = Vector::<LANES>::splat(0);
diff --git a/library/portable-simd/crates/core_simd/tests/round.rs b/library/portable-simd/crates/core_simd/tests/round.rs
index 1a1bc9ebca7..53732329237 100644
--- a/library/portable-simd/crates/core_simd/tests/round.rs
+++ b/library/portable-simd/crates/core_simd/tests/round.rs
@@ -53,14 +53,6 @@ macro_rules! float_rounding_test {
             }
 
             test_helpers::test_lanes! {
-                fn from_int<const LANES: usize>() {
-                    test_helpers::test_unary_elementwise(
-                        &Vector::<LANES>::round_from_int,
-                        &|x| x as Scalar,
-                        &|_| true,
-                    )
-                }
-
                 fn to_int_unchecked<const LANES: usize>() {
                     // The maximum integer that can be represented by the equivalently sized float has
                     // all of the mantissa digits set to 1, pushed up to the MSB.
@@ -72,11 +64,11 @@ macro_rules! float_rounding_test {
                     runner.run(
                         &test_helpers::array::UniformArrayStrategy::new(-MAX_REPRESENTABLE_VALUE..MAX_REPRESENTABLE_VALUE),
                         |x| {
-                            let result_1 = unsafe { Vector::from_array(x).to_int_unchecked().to_array() };
+                            let result_1 = unsafe { Vector::from_array(x).to_int_unchecked::<IntScalar>().to_array() };
                             let result_2 = {
-                                let mut result = [0; LANES];
+                                let mut result: [IntScalar; LANES] = [0; LANES];
                                 for (i, o) in x.iter().zip(result.iter_mut()) {
-                                    *o = unsafe { i.to_int_unchecked() };
+                                    *o = unsafe { i.to_int_unchecked::<IntScalar>() };
                                 }
                                 result
                             };