about summary refs log tree commit diff
path: root/library/portable-simd
diff options
context:
space:
mode:
authorCaleb Zulawski <caleb.zulawski@gmail.com>2025-01-18 15:37:14 -0500
committerCaleb Zulawski <caleb.zulawski@gmail.com>2025-01-18 15:37:14 -0500
commit1ff6c555bbf95d62bbace49b1cb3b48d9d89cab1 (patch)
treee1082a6922210e75934c1ddaa9b7b1d453c6eb5a /library/portable-simd
parent8321f00bf4a5f25cad5ec8861c5469afe6e0ce8b (diff)
parent3383cfbd3572465febc7a8f816a46304373de46a (diff)
downloadrust-1ff6c555bbf95d62bbace49b1cb3b48d9d89cab1.tar.gz
rust-1ff6c555bbf95d62bbace49b1cb3b48d9d89cab1.zip
Merge commit '3383cfbd3572465febc7a8f816a46304373de46a' into sync-from-portable-simd-2025-01-18
Diffstat (limited to 'library/portable-simd')
-rw-r--r--library/portable-simd/.github/workflows/ci.yml125
-rw-r--r--library/portable-simd/.github/workflows/doc.yml2
-rw-r--r--library/portable-simd/.gitignore1
-rw-r--r--library/portable-simd/Cargo.toml6
-rw-r--r--library/portable-simd/Cross.toml2
-rw-r--r--library/portable-simd/crates/core_simd/Cargo.toml3
-rw-r--r--library/portable-simd/crates/core_simd/src/lane_count.rs8
-rw-r--r--library/portable-simd/crates/core_simd/src/lib.rs4
-rw-r--r--library/portable-simd/crates/core_simd/src/masks.rs42
-rw-r--r--library/portable-simd/crates/core_simd/src/masks/bitmask.rs17
-rw-r--r--library/portable-simd/crates/core_simd/src/masks/full_masks.rs58
-rw-r--r--library/portable-simd/crates/core_simd/src/ops.rs25
-rw-r--r--library/portable-simd/crates/core_simd/src/simd/cmp/eq.rs2
-rw-r--r--library/portable-simd/crates/core_simd/src/simd/num/float.rs30
-rw-r--r--library/portable-simd/crates/core_simd/src/simd/num/int.rs43
-rw-r--r--library/portable-simd/crates/core_simd/src/simd/num/uint.rs42
-rw-r--r--library/portable-simd/crates/core_simd/src/simd/ptr/const_ptr.rs21
-rw-r--r--library/portable-simd/crates/core_simd/src/simd/ptr/mut_ptr.rs21
-rw-r--r--library/portable-simd/crates/core_simd/src/swizzle.rs257
-rw-r--r--library/portable-simd/crates/core_simd/src/swizzle_dyn.rs59
-rw-r--r--library/portable-simd/crates/core_simd/src/vector.rs43
-rw-r--r--library/portable-simd/crates/core_simd/src/vendor.rs3
-rw-r--r--library/portable-simd/crates/core_simd/src/vendor/loongarch64.rs31
-rw-r--r--library/portable-simd/crates/core_simd/tests/layout.rs35
-rw-r--r--library/portable-simd/crates/core_simd/tests/masks.rs43
-rw-r--r--library/portable-simd/crates/core_simd/tests/ops_macros.rs38
-rw-r--r--library/portable-simd/crates/core_simd/tests/swizzle.rs18
-rw-r--r--library/portable-simd/crates/test_helpers/Cargo.toml3
-rw-r--r--library/portable-simd/crates/test_helpers/src/lib.rs216
-rw-r--r--library/portable-simd/rust-toolchain.toml3
-rwxr-xr-xlibrary/portable-simd/subtree-sync.sh52
31 files changed, 865 insertions, 388 deletions
diff --git a/library/portable-simd/.github/workflows/ci.yml b/library/portable-simd/.github/workflows/ci.yml
index b292be2d6f9..3984d8f0d8d 100644
--- a/library/portable-simd/.github/workflows/ci.yml
+++ b/library/portable-simd/.github/workflows/ci.yml
@@ -9,6 +9,7 @@ on:
 env:
   CARGO_NET_RETRY: 10
   RUSTUP_MAX_RETRIES: 10
+  PROPTEST_CASES: 64
 
 jobs:
   rustfmt:
@@ -16,12 +17,7 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-      - uses: actions/checkout@v2
-      - name: Setup Rust
-        run: |
-          rustup update nightly --no-self-update
-          rustup default nightly
-          rustup component add rustfmt
+      - uses: actions/checkout@v4
       - name: Run rustfmt
         run: cargo fmt --all -- --check
 
@@ -37,7 +33,9 @@ jobs:
           - i686-unknown-linux-gnu
           - i586-unknown-linux-gnu
           - aarch64-unknown-linux-gnu
+          - arm64ec-pc-windows-msvc
           - armv7-unknown-linux-gnueabihf
+          - loongarch64-unknown-linux-gnu
           # non-nightly since https://github.com/rust-lang/rust/pull/113274
           # - mips-unknown-linux-gnu
           # - mips64-unknown-linux-gnuabi64
@@ -49,13 +47,9 @@ jobs:
           - wasm32-unknown-unknown
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - name: Setup Rust
-        run: |
-          rustup update nightly --no-self-update
-          rustup default nightly
-          rustup target add ${{ matrix.target }}
-          rustup component add clippy
+        run: rustup target add ${{ matrix.target }}
       - name: Run Clippy
         run: cargo clippy --all-targets --target ${{ matrix.target }}
 
@@ -65,26 +59,19 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        target: [x86_64-pc-windows-msvc, i686-pc-windows-msvc, i586-pc-windows-msvc, x86_64-unknown-linux-gnu, x86_64-apple-darwin]
+        target: [x86_64-pc-windows-msvc, i686-pc-windows-msvc, i586-pc-windows-msvc, x86_64-unknown-linux-gnu]
         # `default` means we use the default target config for the target,
         # `native` means we run with `-Ctarget-cpu=native`, and anything else is
         # an arg to `-Ctarget-feature`
         target_feature: [default, native, +sse3, +ssse3, +sse4.1, +sse4.2, +avx, +avx2]
 
         exclude:
-          # The macos runners seem to only reliably support up to `avx`.
-          - { target: x86_64-apple-darwin, target_feature: +avx2 }
-          # These features are statically known to be present for all 64 bit
-          # macs, and thus are covered by the `default` test
-          - { target: x86_64-apple-darwin, target_feature: +sse3 }
-          - { target: x86_64-apple-darwin, target_feature: +ssse3 }
           # -Ctarget-cpu=native sounds like bad-news if target != host
           - { target: i686-pc-windows-msvc, target_feature: native }
           - { target: i586-pc-windows-msvc, target_feature: native }
 
         include:
           # Populate the `matrix.os` field
-          - { target: x86_64-apple-darwin,      os: macos-latest }
           - { target: x86_64-unknown-linux-gnu, os: ubuntu-latest }
           - { target: x86_64-pc-windows-msvc,   os: windows-latest }
           - { target: i686-pc-windows-msvc,     os: windows-latest }
@@ -98,12 +85,9 @@ jobs:
           # avx512vl, but occasionally doesn't.  Maybe one day we can enable it.
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - name: Setup Rust
-        run: |
-          rustup update nightly --no-self-update
-          rustup default nightly
-          rustup target add ${{ matrix.target }}
+        run: rustup target add ${{ matrix.target }}
 
       - name: Configure RUSTFLAGS
         shell: bash
@@ -145,6 +129,35 @@ jobs:
         run: cargo doc --verbose --target=${{ matrix.target }}
         env:
           RUSTDOCFLAGS: -Dwarnings
+    
+  macos-tests:
+    name: ${{ matrix.target }}
+    runs-on: macos-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        target:
+          - aarch64-apple-darwin
+          - x86_64-apple-darwin
+    steps:
+      - uses: actions/checkout@v4
+      - name: Setup Rust
+        run: rustup target add ${{ matrix.target }}
+
+      - name: Configure RUSTFLAGS
+        shell: bash
+        run: echo "RUSTFLAGS=-Dwarnings" >> $GITHUB_ENV
+
+      - name: Test (debug)
+        run: cargo test --verbose --target=${{ matrix.target }}
+
+      - name: Test (release)
+        run: cargo test --verbose --target=${{ matrix.target }} --release
+
+      - name: Generate docs
+        run: cargo doc --verbose --target=${{ matrix.target }}
+        env:
+          RUSTDOCFLAGS: -Dwarnings
 
   wasm-tests:
     name: "wasm (firefox, ${{ matrix.name }})"
@@ -155,11 +168,7 @@ jobs:
           - { name: default, RUSTFLAGS: "" }
           - { name: simd128, RUSTFLAGS: "-C target-feature=+simd128" }
     steps:
-      - uses: actions/checkout@v2
-      - name: Setup Rust
-        run: |
-          rustup update nightly --no-self-update
-          rustup default nightly
+      - uses: actions/checkout@v4
       - name: Install wasm-pack
         run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
       - name: Test (debug)
@@ -174,6 +183,8 @@ jobs:
   cross-tests:
     name: "${{ matrix.target_feature }} on ${{ matrix.target }} (via cross)"
     runs-on: ubuntu-latest
+    env:
+      PROPTEST_CASES: 16
     strategy:
       fail-fast: false
 
@@ -185,6 +196,7 @@ jobs:
           - powerpc-unknown-linux-gnu
           - powerpc64le-unknown-linux-gnu       # includes altivec by default
           - riscv64gc-unknown-linux-gnu
+          - loongarch64-unknown-linux-gnu
           # MIPS uses a nonstandard binary representation for NaNs which makes it worth testing
           # non-nightly since https://github.com/rust-lang/rust/pull/113274
           # - mips-unknown-linux-gnu
@@ -201,24 +213,14 @@ jobs:
           # - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" }
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - name: Setup Rust
-        run: |
-          rustup update nightly --no-self-update
-          rustup default nightly
-          rustup target add ${{ matrix.target }}
-          rustup component add rust-src
+        run: rustup target add ${{ matrix.target }}
 
       - name: Install Cross
-        # Equivalent to `cargo install cross`, but downloading a prebuilt
-        # binary. Ideally we wouldn't hardcode a version, but the version number
-        # being part of the tarball means we can't just use the download/latest
-        # URL :(
+        # Install the latest git version for newer targets.
         run: |
-          CROSS_URL=https://github.com/cross-rs/cross/releases/download/v0.2.5/cross-x86_64-unknown-linux-gnu.tar.gz
-          mkdir -p "$HOME/.bin"
-          curl -sfSL --retry-delay 10 --retry 5 "${CROSS_URL}" | tar zxf - -C "$HOME/.bin"
-          echo "$HOME/.bin" >> $GITHUB_PATH
+          cargo install cross --git https://github.com/cross-rs/cross --rev 4090beca3cfffa44371a5bba524de3a578aa46c3
 
       - name: Configure Emulated CPUs
         run: |
@@ -242,34 +244,11 @@ jobs:
       - name: Test (release)
         run: cross test --verbose --target=${{ matrix.target }} --release
 
-  features:
-    name: "Test cargo features (${{ matrix.simd }} × ${{ matrix.features }})"
+  miri:
     runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        simd:
-          - ""
-          - "avx512"
-        features:
-          - ""
-          - "--features std"
-          - "--features all_lane_counts"
-          - "--all-features"
-
+    env:
+      PROPTEST_CASES: 16
     steps:
-      - uses: actions/checkout@v2
-      - name: Setup Rust
-        run: |
-          rustup update nightly --no-self-update
-          rustup default nightly
-      - name: Detect AVX512
-        run: echo "CPU_FEATURE=$(lscpu | grep -o avx512[a-z]* | sed s/avx/+avx/ | tr '\n' ',' )" >> $GITHUB_ENV
-      - name: Check build
-        if: ${{ matrix.simd == '' }}
-        run: RUSTFLAGS="-Dwarnings" cargo test --all-targets --no-default-features ${{ matrix.features }}
-      - name: Check AVX
-        if: ${{ matrix.simd == 'avx512' && contains(env.CPU_FEATURE, 'avx512') }}
-        run: |
-          echo "Found AVX features: $CPU_FEATURE"
-          RUSTFLAGS="-Dwarnings -Ctarget-feature=$CPU_FEATURE" cargo test --all-targets --no-default-features ${{ matrix.features }}
+      - uses: actions/checkout@v4
+      - name: Test (Miri)
+        run: cargo miri test
diff --git a/library/portable-simd/.github/workflows/doc.yml b/library/portable-simd/.github/workflows/doc.yml
index 9d1fa66ccb5..22c2cb3f67f 100644
--- a/library/portable-simd/.github/workflows/doc.yml
+++ b/library/portable-simd/.github/workflows/doc.yml
@@ -12,7 +12,7 @@ jobs:
 
     steps:
       - name: Checkout Repository
-        uses: actions/checkout@v1
+        uses: actions/checkout@v4
 
       - name: Setup Rust
         run: |
diff --git a/library/portable-simd/.gitignore b/library/portable-simd/.gitignore
index ea8c4bf7f35..9673e52dcad 100644
--- a/library/portable-simd/.gitignore
+++ b/library/portable-simd/.gitignore
@@ -1 +1,2 @@
 /target
+git-subtree.sh
diff --git a/library/portable-simd/Cargo.toml b/library/portable-simd/Cargo.toml
index d1732aaec2f..21d4584a9f4 100644
--- a/library/portable-simd/Cargo.toml
+++ b/library/portable-simd/Cargo.toml
@@ -5,3 +5,9 @@ members = [
     "crates/std_float",
     "crates/test_helpers",
 ]
+
+[profile.test.package."*"]
+opt-level = 2
+
+[profile.test.package.test_helpers]
+opt-level = 2
diff --git a/library/portable-simd/Cross.toml b/library/portable-simd/Cross.toml
new file mode 100644
index 00000000000..d21e76b92dd
--- /dev/null
+++ b/library/portable-simd/Cross.toml
@@ -0,0 +1,2 @@
+[build.env]
+passthrough = ["PROPTEST_CASES"]
diff --git a/library/portable-simd/crates/core_simd/Cargo.toml b/library/portable-simd/crates/core_simd/Cargo.toml
index b4a8fd70f4c..a7a6d43b11d 100644
--- a/library/portable-simd/crates/core_simd/Cargo.toml
+++ b/library/portable-simd/crates/core_simd/Cargo.toml
@@ -9,10 +9,9 @@ categories = ["hardware-support", "no-std"]
 license = "MIT OR Apache-2.0"
 
 [features]
-default = ["as_crate"]
+default = ["as_crate", "std"]
 as_crate = []
 std = []
-all_lane_counts = []
 
 [target.'cfg(target_arch = "wasm32")'.dev-dependencies]
 wasm-bindgen = "0.2"
diff --git a/library/portable-simd/crates/core_simd/src/lane_count.rs b/library/portable-simd/crates/core_simd/src/lane_count.rs
index 4cd7265ed67..280b27bc9bc 100644
--- a/library/portable-simd/crates/core_simd/src/lane_count.rs
+++ b/library/portable-simd/crates/core_simd/src/lane_count.rs
@@ -33,10 +33,8 @@ macro_rules! supported_lane_count {
     };
 }
 
-supported_lane_count!(1, 2, 4, 8, 16, 32, 64);
-#[cfg(feature = "all_lane_counts")]
 supported_lane_count!(
-    3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
-    31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
-    56, 57, 58, 59, 60, 61, 62, 63
+    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
+    27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+    51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
 );
diff --git a/library/portable-simd/crates/core_simd/src/lib.rs b/library/portable-simd/crates/core_simd/src/lib.rs
index 992a7705e3c..7f57847c9c2 100644
--- a/library/portable-simd/crates/core_simd/src/lib.rs
+++ b/library/portable-simd/crates/core_simd/src/lib.rs
@@ -1,7 +1,6 @@
 #![no_std]
 #![feature(
-    const_refs_to_cell,
-    const_mut_refs,
+    const_eval_select,
     convert_float_to_int,
     core_intrinsics,
     decl_macro,
@@ -26,6 +25,7 @@
     all(target_arch = "arm", target_feature = "v7"),
     feature(stdarch_arm_neon_intrinsics)
 )]
+#![cfg_attr(target_arch = "loongarch64", feature(stdarch_loongarch))]
 #![cfg_attr(
     any(target_arch = "powerpc", target_arch = "powerpc64"),
     feature(stdarch_powerpc)
diff --git a/library/portable-simd/crates/core_simd/src/masks.rs b/library/portable-simd/crates/core_simd/src/masks.rs
index 04de3a96827..b763a7c75a5 100644
--- a/library/portable-simd/crates/core_simd/src/masks.rs
+++ b/library/portable-simd/crates/core_simd/src/masks.rs
@@ -308,48 +308,6 @@ where
         Self(mask_impl::Mask::from_bitmask_integer(bitmask))
     }
 
-    /// Creates a bitmask vector from a mask.
-    ///
-    /// Each bit is set if the corresponding element in the mask is `true`.
-    /// The remaining bits are unset.
-    ///
-    /// The bits are packed into the first N bits of the vector:
-    /// ```
-    /// # #![feature(portable_simd)]
-    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
-    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::mask32x8;
-    /// let mask = mask32x8::from_array([true, false, true, false, false, false, true, false]);
-    /// assert_eq!(mask.to_bitmask_vector()[0], 0b01000101);
-    /// ```
-    #[inline]
-    #[must_use = "method returns a new integer and does not mutate the original value"]
-    pub fn to_bitmask_vector(self) -> Simd<u8, N> {
-        self.0.to_bitmask_vector()
-    }
-
-    /// Creates a mask from a bitmask vector.
-    ///
-    /// For each bit, if it is set, the corresponding element in the mask is set to `true`.
-    ///
-    /// The bits are packed into the first N bits of the vector:
-    /// ```
-    /// # #![feature(portable_simd)]
-    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
-    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
-    /// # use simd::{mask32x8, u8x8};
-    /// let bitmask = u8x8::from_array([0b01000101, 0, 0, 0, 0, 0, 0, 0]);
-    /// assert_eq!(
-    ///     mask32x8::from_bitmask_vector(bitmask),
-    ///     mask32x8::from_array([true, false, true, false, false, false, true, false]),
-    /// );
-    /// ```
-    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn from_bitmask_vector(bitmask: Simd<u8, N>) -> Self {
-        Self(mask_impl::Mask::from_bitmask_vector(bitmask))
-    }
-
     /// Finds the index of the first set element.
     ///
     /// ```
diff --git a/library/portable-simd/crates/core_simd/src/masks/bitmask.rs b/library/portable-simd/crates/core_simd/src/masks/bitmask.rs
index 96c553426ee..db4312d5bf8 100644
--- a/library/portable-simd/crates/core_simd/src/masks/bitmask.rs
+++ b/library/portable-simd/crates/core_simd/src/masks/bitmask.rs
@@ -123,23 +123,6 @@ where
     }
 
     #[inline]
-    #[must_use = "method returns a new vector and does not mutate the original value"]
-    pub fn to_bitmask_vector(self) -> Simd<u8, N> {
-        let mut bitmask = Simd::splat(0);
-        bitmask.as_mut_array()[..self.0.as_ref().len()].copy_from_slice(self.0.as_ref());
-        bitmask
-    }
-
-    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn from_bitmask_vector(bitmask: Simd<u8, N>) -> Self {
-        let mut bytes = <LaneCount<N> as SupportedLaneCount>::BitMask::default();
-        let len = bytes.as_ref().len();
-        bytes.as_mut().copy_from_slice(&bitmask.as_array()[..len]);
-        Self(bytes, PhantomData)
-    }
-
-    #[inline]
     pub fn to_bitmask_integer(self) -> u64 {
         let mut bitmask = [0u8; 8];
         bitmask[..self.0.as_ref().len()].copy_from_slice(self.0.as_ref());
diff --git a/library/portable-simd/crates/core_simd/src/masks/full_masks.rs b/library/portable-simd/crates/core_simd/src/masks/full_masks.rs
index 87f031a9f36..2d01946b574 100644
--- a/library/portable-simd/crates/core_simd/src/masks/full_masks.rs
+++ b/library/portable-simd/crates/core_simd/src/masks/full_masks.rs
@@ -141,62 +141,6 @@ where
     }
 
     #[inline]
-    #[must_use = "method returns a new vector and does not mutate the original value"]
-    pub fn to_bitmask_vector(self) -> Simd<u8, N> {
-        let mut bitmask = Simd::splat(0);
-
-        // Safety: Bytes is the right size array
-        unsafe {
-            // Compute the bitmask
-            let mut bytes: <LaneCount<N> as SupportedLaneCount>::BitMask =
-                core::intrinsics::simd::simd_bitmask(self.0);
-
-            // LLVM assumes bit order should match endianness
-            if cfg!(target_endian = "big") {
-                for x in bytes.as_mut() {
-                    *x = x.reverse_bits()
-                }
-                if N % 8 > 0 {
-                    bytes.as_mut()[N / 8] >>= 8 - N % 8;
-                }
-            }
-
-            bitmask.as_mut_array()[..bytes.as_ref().len()].copy_from_slice(bytes.as_ref());
-        }
-
-        bitmask
-    }
-
-    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn from_bitmask_vector(bitmask: Simd<u8, N>) -> Self {
-        let mut bytes = <LaneCount<N> as SupportedLaneCount>::BitMask::default();
-
-        // Safety: Bytes is the right size array
-        unsafe {
-            let len = bytes.as_ref().len();
-            bytes.as_mut().copy_from_slice(&bitmask.as_array()[..len]);
-
-            // LLVM assumes bit order should match endianness
-            if cfg!(target_endian = "big") {
-                for x in bytes.as_mut() {
-                    *x = x.reverse_bits();
-                }
-                if N % 8 > 0 {
-                    bytes.as_mut()[N / 8] >>= 8 - N % 8;
-                }
-            }
-
-            // Compute the regular mask
-            Self::from_int_unchecked(core::intrinsics::simd::simd_select_bitmask(
-                bytes,
-                Self::splat(true).to_int(),
-                Self::splat(false).to_int(),
-            ))
-        }
-    }
-
-    #[inline]
     unsafe fn to_bitmask_impl<U: ReverseBits, const M: usize>(self) -> U
     where
         LaneCount<M>: SupportedLaneCount,
@@ -283,7 +227,7 @@ where
     }
 
     #[inline]
-    #[must_use = "method returns a new vector and does not mutate the original value"]
+    #[must_use = "method returns a new bool and does not mutate the original value"]
     pub fn all(self) -> bool {
         // Safety: use `self` as an integer vector
         unsafe { core::intrinsics::simd::simd_reduce_all(self.to_int()) }
diff --git a/library/portable-simd/crates/core_simd/src/ops.rs b/library/portable-simd/crates/core_simd/src/ops.rs
index dd7303a97b1..d3bd14a3402 100644
--- a/library/portable-simd/crates/core_simd/src/ops.rs
+++ b/library/portable-simd/crates/core_simd/src/ops.rs
@@ -77,7 +77,7 @@ macro_rules! int_divrem_guard {
     (   $lhs:ident,
         $rhs:ident,
         {   const PANIC_ZERO: &'static str = $zero:literal;
-            $simd_call:ident
+            $simd_call:ident, $op:tt
         },
         $int:ident ) => {
         if $rhs.simd_eq(Simd::splat(0 as _)).any() {
@@ -96,8 +96,23 @@ macro_rules! int_divrem_guard {
                 // Nice base case to make it easy to const-fold away the other branch.
                 $rhs
             };
-            // Safety: $lhs and rhs are vectors
-            unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) }
+
+            // aarch64 div fails for arbitrary `v % 0`, mod fails when rhs is MIN, for non-powers-of-two
+            // these operations aren't vectorized on aarch64 anyway
+            #[cfg(target_arch = "aarch64")]
+            {
+                let mut out = Simd::splat(0 as _);
+                for i in 0..Self::LEN {
+                    out[i] = $lhs[i] $op rhs[i];
+                }
+                out
+            }
+
+            #[cfg(not(target_arch = "aarch64"))]
+            {
+                // Safety: $lhs and rhs are vectors
+                unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) }
+            }
         }
     };
 }
@@ -205,14 +220,14 @@ for_base_ops! {
     impl Div::div {
         int_divrem_guard {
             const PANIC_ZERO: &'static str = "attempt to divide by zero";
-            simd_div
+            simd_div, /
         }
     }
 
     impl Rem::rem {
         int_divrem_guard {
             const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
-            simd_rem
+            simd_rem, %
         }
     }
 
diff --git a/library/portable-simd/crates/core_simd/src/simd/cmp/eq.rs b/library/portable-simd/crates/core_simd/src/simd/cmp/eq.rs
index 5b4615ce51d..93989ce91b8 100644
--- a/library/portable-simd/crates/core_simd/src/simd/cmp/eq.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/cmp/eq.rs
@@ -12,7 +12,7 @@ pub trait SimdPartialEq {
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn simd_eq(self, other: Self) -> Self::Mask;
 
-    /// Test if each element is equal to the corresponding element in `other`.
+    /// Test if each element is not equal to the corresponding element in `other`.
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn simd_ne(self, other: Self) -> Self::Mask;
 }
diff --git a/library/portable-simd/crates/core_simd/src/simd/num/float.rs b/library/portable-simd/crates/core_simd/src/simd/num/float.rs
index 59e43851ea8..79954b937b3 100644
--- a/library/portable-simd/crates/core_simd/src/simd/num/float.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/num/float.rs
@@ -255,6 +255,7 @@ macro_rules! impl_trait {
             type Bits = Simd<$bits_ty, N>;
             type Cast<T: SimdElement> = Simd<T, N>;
 
+            #[cfg(not(target_arch = "aarch64"))]
             #[inline]
             fn cast<T: SimdCast>(self) -> Self::Cast<T>
             {
@@ -262,6 +263,33 @@ macro_rules! impl_trait {
                 unsafe { core::intrinsics::simd::simd_as(self) }
             }
 
+            // https://github.com/llvm/llvm-project/issues/94694
+            #[cfg(target_arch = "aarch64")]
+            #[inline]
+            fn cast<T: SimdCast>(self) -> Self::Cast<T>
+            {
+                const { assert!(N <= 64) };
+                if N <= 2 || N == 4 || N == 8 || N == 16 || N == 32 || N == 64 {
+                    // Safety: supported types are guaranteed by SimdCast
+                    unsafe { core::intrinsics::simd::simd_as(self) }
+                } else if N < 4 {
+                    let x = self.resize::<4>(Default::default()).cast();
+                    x.resize::<N>(x[0])
+                } else if N < 8 {
+                    let x = self.resize::<8>(Default::default()).cast();
+                    x.resize::<N>(x[0])
+                } else if N < 16 {
+                    let x = self.resize::<16>(Default::default()).cast();
+                    x.resize::<N>(x[0])
+                } else if N < 32 {
+                    let x = self.resize::<32>(Default::default()).cast();
+                    x.resize::<N>(x[0])
+                } else {
+                    let x = self.resize::<64>(Default::default()).cast();
+                    x.resize::<N>(x[0])
+                }
+            }
+
             #[inline]
             #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
             unsafe fn to_int_unchecked<I: SimdCast>(self) -> Self::Cast<I>
@@ -391,7 +419,7 @@ macro_rules! impl_trait {
                     self.as_array().iter().sum()
                 } else {
                     // Safety: `self` is a float vector
-                    unsafe { core::intrinsics::simd::simd_reduce_add_ordered(self, 0.) }
+                    unsafe { core::intrinsics::simd::simd_reduce_add_ordered(self, -0.) }
                 }
             }
 
diff --git a/library/portable-simd/crates/core_simd/src/simd/num/int.rs b/library/portable-simd/crates/core_simd/src/simd/num/int.rs
index d7598d9ceaf..3a51235ff95 100644
--- a/library/portable-simd/crates/core_simd/src/simd/num/int.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/num/int.rs
@@ -1,6 +1,6 @@
 use super::sealed::Sealed;
 use crate::simd::{
-    cmp::SimdPartialOrd, num::SimdUint, LaneCount, Mask, Simd, SimdCast, SimdElement,
+    cmp::SimdOrd, cmp::SimdPartialOrd, num::SimdUint, LaneCount, Mask, Simd, SimdCast, SimdElement,
     SupportedLaneCount,
 };
 
@@ -70,11 +70,27 @@ pub trait SimdInt: Copy + Sealed {
     /// # #[cfg(not(feature = "as_crate"))] use core::simd;
     /// # use simd::prelude::*;
     /// use core::i32::{MIN, MAX};
-    /// let xs = Simd::from_array([MIN, MIN +1, -5, 0]);
+    /// let xs = Simd::from_array([MIN, MIN + 1, -5, 0]);
     /// assert_eq!(xs.abs(), Simd::from_array([MIN, MAX, 5, 0]));
     /// ```
     fn abs(self) -> Self;
 
+    /// Lanewise absolute difference.
+    /// Every element becomes the absolute difference of `self` and `second`.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::prelude::*;
+    /// use core::i32::{MIN, MAX};
+    /// let a = Simd::from_array([MIN, MAX, 100, -100]);
+    /// let b = Simd::from_array([MAX, MIN, -80, -120]);
+    /// assert_eq!(a.abs_diff(b), Simd::from_array([u32::MAX, u32::MAX, 180, 20]));
+    /// ```
+    fn abs_diff(self, second: Self) -> Self::Unsigned;
+
     /// Lanewise saturating absolute value, implemented in Rust.
     /// As abs(), except the MIN value becomes MAX instead of itself.
     ///
@@ -203,6 +219,12 @@ pub trait SimdInt: Copy + Sealed {
     /// The least significant bit becomes the most significant bit, second least-significant bit becomes second most-significant bit, etc.
     fn reverse_bits(self) -> Self;
 
+    /// Returns the number of ones in the binary representation of each element.
+    fn count_ones(self) -> Self::Unsigned;
+
+    /// Returns the number of zeros in the binary representation of each element.
+    fn count_zeros(self) -> Self::Unsigned;
+
     /// Returns the number of leading zeros in the binary representation of each element.
     fn leading_zeros(self) -> Self::Unsigned;
 
@@ -260,6 +282,13 @@ macro_rules! impl_trait {
             }
 
             #[inline]
+            fn abs_diff(self, second: Self) -> Self::Unsigned {
+                let max = self.simd_max(second);
+                let min = self.simd_min(second);
+                (max - min).cast()
+            }
+
+            #[inline]
             fn saturating_abs(self) -> Self {
                 // arith shift for -1 or 0 mask based on sign bit, giving 2s complement
                 const SHR: $ty = <$ty>::BITS as $ty - 1;
@@ -345,6 +374,16 @@ macro_rules! impl_trait {
             }
 
             #[inline]
+            fn count_ones(self) -> Self::Unsigned {
+                self.cast::<$unsigned>().count_ones()
+            }
+
+            #[inline]
+            fn count_zeros(self) -> Self::Unsigned {
+                self.cast::<$unsigned>().count_zeros()
+            }
+
+            #[inline]
             fn leading_zeros(self) -> Self::Unsigned {
                 self.cast::<$unsigned>().leading_zeros()
             }
diff --git a/library/portable-simd/crates/core_simd/src/simd/num/uint.rs b/library/portable-simd/crates/core_simd/src/simd/num/uint.rs
index 53dd97f501c..1ab2d8c7b73 100644
--- a/library/portable-simd/crates/core_simd/src/simd/num/uint.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/num/uint.rs
@@ -1,5 +1,5 @@
 use super::sealed::Sealed;
-use crate::simd::{LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount};
+use crate::simd::{cmp::SimdOrd, LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount};
 
 /// Operations on SIMD vectors of unsigned integers.
 pub trait SimdUint: Copy + Sealed {
@@ -57,6 +57,22 @@ pub trait SimdUint: Copy + Sealed {
     /// assert_eq!(sat, Simd::splat(0));
     fn saturating_sub(self, second: Self) -> Self;
 
+    /// Lanewise absolute difference.
+    /// Every element becomes the absolute difference of `self` and `second`.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::prelude::*;
+    /// use core::u32::MAX;
+    /// let a = Simd::from_array([0, MAX, 100, 20]);
+    /// let b = Simd::from_array([MAX, 0, 80, 200]);
+    /// assert_eq!(a.abs_diff(b), Simd::from_array([MAX, MAX, 20, 180]));
+    /// ```
+    fn abs_diff(self, second: Self) -> Self;
+
     /// Returns the sum of the elements of the vector, with wrapping addition.
     fn reduce_sum(self) -> Self::Scalar;
 
@@ -85,6 +101,12 @@ pub trait SimdUint: Copy + Sealed {
     /// The least significant bit becomes the most significant bit, second least-significant bit becomes second most-significant bit, etc.
     fn reverse_bits(self) -> Self;
 
+    /// Returns the number of ones in the binary representation of each element.
+    fn count_ones(self) -> Self;
+
+    /// Returns the number of zeros in the binary representation of each element.
+    fn count_zeros(self) -> Self;
+
     /// Returns the number of leading zeros in the binary representation of each element.
     fn leading_zeros(self) -> Self;
 
@@ -139,6 +161,13 @@ macro_rules! impl_trait {
             }
 
             #[inline]
+            fn abs_diff(self, second: Self) -> Self {
+                let max = self.simd_max(second);
+                let min = self.simd_min(second);
+                max - min
+            }
+
+            #[inline]
             fn reduce_sum(self) -> Self::Scalar {
                 // Safety: `self` is an integer vector
                 unsafe { core::intrinsics::simd::simd_reduce_add_ordered(self, 0) }
@@ -193,6 +222,17 @@ macro_rules! impl_trait {
             }
 
             #[inline]
+            fn count_ones(self) -> Self {
+                // Safety: `self` is an integer vector
+                unsafe { core::intrinsics::simd::simd_ctpop(self) }
+            }
+
+            #[inline]
+            fn count_zeros(self) -> Self {
+                (!self).count_ones()
+            }
+
+            #[inline]
             fn leading_zeros(self) -> Self {
                 // Safety: `self` is an integer vector
                 unsafe { core::intrinsics::simd::simd_ctlz(self) }
diff --git a/library/portable-simd/crates/core_simd/src/simd/ptr/const_ptr.rs b/library/portable-simd/crates/core_simd/src/simd/ptr/const_ptr.rs
index be635ea640b..47383809ffb 100644
--- a/library/portable-simd/crates/core_simd/src/simd/ptr/const_ptr.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/ptr/const_ptr.rs
@@ -42,6 +42,19 @@ pub trait SimdConstPtr: Copy + Sealed {
     /// Equivalent to calling [`pointer::addr`] on each element.
     fn addr(self) -> Self::Usize;
 
+    /// Converts an address to a pointer without giving it any provenance.
+    ///
+    /// Without provenance, this pointer is not associated with any actual allocation. Such a
+    /// no-provenance pointer may be used for zero-sized memory accesses (if suitably aligned), but
+    /// non-zero-sized memory accesses with a no-provenance pointer are UB. No-provenance pointers
+    /// are little more than a usize address in disguise.
+    ///
+    /// This is different from [`Self::with_exposed_provenance`], which creates a pointer that picks up a
+    /// previously exposed provenance.
+    ///
+    /// Equivalent to calling [`core::ptr::without_provenance`] on each element.
+    fn without_provenance(addr: Self::Usize) -> Self;
+
     /// Creates a new pointer with the given address.
     ///
     /// This performs the same operation as a cast, but copies the *address-space* and
@@ -119,6 +132,14 @@ where
     }
 
     #[inline]
+    fn without_provenance(addr: Self::Usize) -> Self {
+        // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
+        // SAFETY: Integer-to-pointer transmutes are valid (if you are okay with not getting any
+        // provenance).
+        unsafe { core::mem::transmute_copy(&addr) }
+    }
+
+    #[inline]
     fn with_addr(self, addr: Self::Usize) -> Self {
         // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
         //
diff --git a/library/portable-simd/crates/core_simd/src/simd/ptr/mut_ptr.rs b/library/portable-simd/crates/core_simd/src/simd/ptr/mut_ptr.rs
index f6823a949e3..3f20eef21a3 100644
--- a/library/portable-simd/crates/core_simd/src/simd/ptr/mut_ptr.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/ptr/mut_ptr.rs
@@ -39,6 +39,19 @@ pub trait SimdMutPtr: Copy + Sealed {
     /// Equivalent to calling [`pointer::addr`] on each element.
     fn addr(self) -> Self::Usize;
 
+    /// Converts an address to a pointer without giving it any provenance.
+    ///
+    /// Without provenance, this pointer is not associated with any actual allocation. Such a
+    /// no-provenance pointer may be used for zero-sized memory accesses (if suitably aligned), but
+    /// non-zero-sized memory accesses with a no-provenance pointer are UB. No-provenance pointers
+    /// are little more than a usize address in disguise.
+    ///
+    /// This is different from [`Self::with_exposed_provenance`], which creates a pointer that picks up a
+    /// previously exposed provenance.
+    ///
+    /// Equivalent to calling [`core::ptr::without_provenance`] on each element.
+    fn without_provenance(addr: Self::Usize) -> Self;
+
     /// Creates a new pointer with the given address.
     ///
     /// This performs the same operation as a cast, but copies the *address-space* and
@@ -116,6 +129,14 @@ where
     }
 
     #[inline]
+    fn without_provenance(addr: Self::Usize) -> Self {
+        // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
+        // SAFETY: Integer-to-pointer transmutes are valid (if you are okay with not getting any
+        // provenance).
+        unsafe { core::mem::transmute_copy(&addr) }
+    }
+
+    #[inline]
     fn with_addr(self, addr: Self::Usize) -> Self {
         // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
         //
diff --git a/library/portable-simd/crates/core_simd/src/swizzle.rs b/library/portable-simd/crates/core_simd/src/swizzle.rs
index d62642fb906..42425ef37e5 100644
--- a/library/portable-simd/crates/core_simd/src/swizzle.rs
+++ b/library/portable-simd/crates/core_simd/src/swizzle.rs
@@ -155,8 +155,7 @@ pub trait Swizzle<const N: usize> {
 
     /// Creates a new mask from the elements of `mask`.
     ///
-    /// Element `i` of the output is `concat[Self::INDEX[i]]`, where `concat` is the concatenation of
-    /// `first` and `second`.
+    /// Element `i` of the output is `mask[Self::INDEX[i]]`.
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original inputs"]
     fn swizzle_mask<T, const M: usize>(mask: Mask<T, M>) -> Mask<T, N>
@@ -260,6 +259,50 @@ where
         Rotate::<OFFSET>::swizzle(self)
     }
 
+    /// Shifts the vector elements to the left by `OFFSET`, filling in with
+    /// `padding` from the right.
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+    pub fn shift_elements_left<const OFFSET: usize>(self, padding: T) -> Self {
+        struct Shift<const OFFSET: usize>;
+
+        impl<const OFFSET: usize, const N: usize> Swizzle<N> for Shift<OFFSET> {
+            const INDEX: [usize; N] = const {
+                let mut index = [N; N];
+                let mut i = 0;
+                while i + OFFSET < N {
+                    index[i] = i + OFFSET;
+                    i += 1;
+                }
+                index
+            };
+        }
+
+        Shift::<OFFSET>::concat_swizzle(self, Simd::splat(padding))
+    }
+
+    /// Shifts the vector elements to the right by `OFFSET`, filling in with
+    /// `padding` from the left.
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+    pub fn shift_elements_right<const OFFSET: usize>(self, padding: T) -> Self {
+        struct Shift<const OFFSET: usize>;
+
+        impl<const OFFSET: usize, const N: usize> Swizzle<N> for Shift<OFFSET> {
+            const INDEX: [usize; N] = const {
+                let mut index = [N; N];
+                let mut i = OFFSET;
+                while i < N {
+                    index[i] = i - OFFSET;
+                    i += 1;
+                }
+                index
+            };
+        }
+
+        Shift::<OFFSET>::concat_swizzle(self, Simd::splat(padding))
+    }
+
     /// Interleave two vectors.
     ///
     /// The resulting vectors contain elements taken alternatively from `self` and `other`, first
@@ -320,7 +363,9 @@ where
     ///
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core::simd::Simd;
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::Simd;
     /// let a = Simd::from_array([0, 4, 1, 5]);
     /// let b = Simd::from_array([2, 6, 3, 7]);
     /// let (x, y) = a.deinterleave(b);
@@ -391,4 +436,210 @@ where
         }
         Resize::<N>::concat_swizzle(self, Simd::splat(value))
     }
+
+    /// Extract a vector from another vector.
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::u32x4;
+    /// let x = u32x4::from_array([0, 1, 2, 3]);
+    /// assert_eq!(x.extract::<1, 2>().to_array(), [1, 2]);
+    /// ```
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+    pub fn extract<const START: usize, const LEN: usize>(self) -> Simd<T, LEN>
+    where
+        LaneCount<LEN>: SupportedLaneCount,
+    {
+        struct Extract<const N: usize, const START: usize>;
+        impl<const N: usize, const START: usize, const LEN: usize> Swizzle<LEN> for Extract<N, START> {
+            const INDEX: [usize; LEN] = const {
+                assert!(START + LEN <= N, "index out of bounds");
+                let mut index = [0; LEN];
+                let mut i = 0;
+                while i < LEN {
+                    index[i] = START + i;
+                    i += 1;
+                }
+                index
+            };
+        }
+        Extract::<N, START>::swizzle(self)
+    }
+}
+
+impl<T, const N: usize> Mask<T, N>
+where
+    T: MaskElement,
+    LaneCount<N>: SupportedLaneCount,
+{
+    /// Reverse the order of the elements in the mask.
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+    pub fn reverse(self) -> Self {
+        // Safety: swizzles are safe for masks
+        unsafe { Self::from_int_unchecked(self.to_int().reverse()) }
+    }
+
+    /// Rotates the mask such that the first `OFFSET` elements of the slice move to the end
+    /// while the last `self.len() - OFFSET` elements move to the front. After calling `rotate_elements_left`,
+    /// the element previously at index `OFFSET` will become the first element in the slice.
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+    pub fn rotate_elements_left<const OFFSET: usize>(self) -> Self {
+        // Safety: swizzles are safe for masks
+        unsafe { Self::from_int_unchecked(self.to_int().rotate_elements_left::<OFFSET>()) }
+    }
+
+    /// Rotates the mask such that the first `self.len() - OFFSET` elements of the mask move to
+    /// the end while the last `OFFSET` elements move to the front. After calling `rotate_elements_right`,
+    /// the element previously at index `self.len() - OFFSET` will become the first element in the slice.
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+    pub fn rotate_elements_right<const OFFSET: usize>(self) -> Self {
+        // Safety: swizzles are safe for masks
+        unsafe { Self::from_int_unchecked(self.to_int().rotate_elements_right::<OFFSET>()) }
+    }
+
+    /// Shifts the mask elements to the left by `OFFSET`, filling in with
+    /// `padding` from the right.
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original inputs"]
+    pub fn shift_elements_left<const OFFSET: usize>(self, padding: bool) -> Self {
+        // Safety: swizzles are safe for masks
+        unsafe {
+            Self::from_int_unchecked(self.to_int().shift_elements_left::<OFFSET>(if padding {
+                T::TRUE
+            } else {
+                T::FALSE
+            }))
+        }
+    }
+
+    /// Shifts the mask elements to the right by `OFFSET`, filling in with
+    /// `padding` from the left.
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original inputs"]
+    pub fn shift_elements_right<const OFFSET: usize>(self, padding: bool) -> Self {
+        // Safety: swizzles are safe for masks
+        unsafe {
+            Self::from_int_unchecked(self.to_int().shift_elements_right::<OFFSET>(if padding {
+                T::TRUE
+            } else {
+                T::FALSE
+            }))
+        }
+    }
+
+    /// Interleave two masks.
+    ///
+    /// The resulting masks contain elements taken alternatively from `self` and `other`, first
+    /// filling the first result, and then the second.
+    ///
+    /// The reverse of this operation is [`Mask::deinterleave`].
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::mask32x4;
+    /// let a = mask32x4::from_array([false, true, false, true]);
+    /// let b = mask32x4::from_array([false, false, true, true]);
+    /// let (x, y) = a.interleave(b);
+    /// assert_eq!(x.to_array(), [false, false, true, false]);
+    /// assert_eq!(y.to_array(), [false, true, true, true]);
+    /// ```
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+    pub fn interleave(self, other: Self) -> (Self, Self) {
+        let (lo, hi) = self.to_int().interleave(other.to_int());
+        // Safety: swizzles are safe for masks
+        unsafe { (Self::from_int_unchecked(lo), Self::from_int_unchecked(hi)) }
+    }
+
+    /// Deinterleave two masks.
+    ///
+    /// The first result takes every other element of `self` and then `other`, starting with
+    /// the first element.
+    ///
+    /// The second result takes every other element of `self` and then `other`, starting with
+    /// the second element.
+    ///
+    /// The reverse of this operation is [`Mask::interleave`].
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::mask32x4;
+    /// let a = mask32x4::from_array([false, true, false, true]);
+    /// let b = mask32x4::from_array([false, false, true, true]);
+    /// let (x, y) = a.deinterleave(b);
+    /// assert_eq!(x.to_array(), [false, false, false, true]);
+    /// assert_eq!(y.to_array(), [true, true, false, true]);
+    /// ```
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+    pub fn deinterleave(self, other: Self) -> (Self, Self) {
+        let (even, odd) = self.to_int().deinterleave(other.to_int());
+        // Safety: swizzles are safe for masks
+        unsafe {
+            (
+                Self::from_int_unchecked(even),
+                Self::from_int_unchecked(odd),
+            )
+        }
+    }
+
+    /// Resize a mask.
+    ///
+    /// If `M` > `N`, extends the length of a mask, setting the new elements to `value`.
+    /// If `M` < `N`, truncates the mask to the first `M` elements.
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::mask32x4;
+    /// let x = mask32x4::from_array([false, true, true, false]);
+    /// assert_eq!(x.resize::<8>(true).to_array(), [false, true, true, false, true, true, true, true]);
+    /// assert_eq!(x.resize::<2>(true).to_array(), [false, true]);
+    /// ```
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+    pub fn resize<const M: usize>(self, value: bool) -> Mask<T, M>
+    where
+        LaneCount<M>: SupportedLaneCount,
+    {
+        // Safety: swizzles are safe for masks
+        unsafe {
+            Mask::<T, M>::from_int_unchecked(self.to_int().resize::<M>(if value {
+                T::TRUE
+            } else {
+                T::FALSE
+            }))
+        }
+    }
+
+    /// Extract a vector from another vector.
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::mask32x4;
+    /// let x = mask32x4::from_array([false, true, true, false]);
+    /// assert_eq!(x.extract::<1, 2>().to_array(), [true, true]);
+    /// ```
+    #[inline]
+    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+    pub fn extract<const START: usize, const LEN: usize>(self) -> Mask<T, LEN>
+    where
+        LaneCount<LEN>: SupportedLaneCount,
+    {
+        // Safety: swizzles are safe for masks
+        unsafe { Mask::<T, LEN>::from_int_unchecked(self.to_int().extract::<START, LEN>()) }
+    }
 }
diff --git a/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs b/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs
index 3b6388d0f27..773bd028bae 100644
--- a/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs
+++ b/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs
@@ -59,15 +59,40 @@ where
                     target_endian = "little"
                 ))]
                 16 => transize(vqtbl1q_u8, self, idxs),
+                #[cfg(all(
+                    target_arch = "arm",
+                    target_feature = "v7",
+                    target_feature = "neon",
+                    target_endian = "little"
+                ))]
+                16 => transize(armv7_neon_swizzle_u8x16, self, idxs),
                 #[cfg(all(target_feature = "avx2", not(target_feature = "avx512vbmi")))]
                 32 => transize(avx2_pshufb, self, idxs),
                 #[cfg(all(target_feature = "avx512vl", target_feature = "avx512vbmi"))]
-                32 => transize(x86::_mm256_permutexvar_epi8, zeroing_idxs(idxs), self),
-                // Notable absence: avx512bw shuffle
-                // If avx512bw is available, odds of avx512vbmi are good
-                // FIXME: initial AVX512VBMI variant didn't actually pass muster
-                // #[cfg(target_feature = "avx512vbmi")]
-                // 64 => transize(x86::_mm512_permutexvar_epi8, self, idxs),
+                32 => {
+                    // Unlike vpshufb, vpermb doesn't zero out values in the result based on the index high bit
+                    let swizzler = |bytes, idxs| {
+                        let mask = x86::_mm256_cmp_epu8_mask::<{ x86::_MM_CMPINT_LT }>(
+                            idxs,
+                            Simd::<u8, 32>::splat(N as u8).into(),
+                        );
+                        x86::_mm256_maskz_permutexvar_epi8(mask, idxs, bytes)
+                    };
+                    transize(swizzler, self, idxs)
+                }
+                // Notable absence: avx512bw pshufb shuffle
+                #[cfg(all(target_feature = "avx512vl", target_feature = "avx512vbmi"))]
+                64 => {
+                    // Unlike vpshufb, vpermb doesn't zero out values in the result based on the index high bit
+                    let swizzler = |bytes, idxs| {
+                        let mask = x86::_mm512_cmp_epu8_mask::<{ x86::_MM_CMPINT_LT }>(
+                            idxs,
+                            Simd::<u8, 64>::splat(N as u8).into(),
+                        );
+                        x86::_mm512_maskz_permutexvar_epi8(mask, idxs, bytes)
+                    };
+                    transize(swizzler, self, idxs)
+                }
                 _ => {
                     let mut array = [0; N];
                     for (i, k) in idxs.to_array().into_iter().enumerate() {
@@ -82,6 +107,28 @@ where
     }
 }
 
+/// armv7 neon supports swizzling `u8x16` by swizzling two u8x8 blocks
+/// with a u8x8x2 lookup table.
+///
+/// # Safety
+/// This requires armv7 neon to work
+#[cfg(all(
+    target_arch = "arm",
+    target_feature = "v7",
+    target_feature = "neon",
+    target_endian = "little"
+))]
+unsafe fn armv7_neon_swizzle_u8x16(bytes: Simd<u8, 16>, idxs: Simd<u8, 16>) -> Simd<u8, 16> {
+    use core::arch::arm::{uint8x8x2_t, vcombine_u8, vget_high_u8, vget_low_u8, vtbl2_u8};
+    // SAFETY: Caller promised arm neon support
+    unsafe {
+        let bytes = uint8x8x2_t(vget_low_u8(bytes.into()), vget_high_u8(bytes.into()));
+        let lo = vtbl2_u8(bytes, vget_low_u8(idxs.into()));
+        let hi = vtbl2_u8(bytes, vget_high_u8(idxs.into()));
+        vcombine_u8(lo, hi).into()
+    }
+}
+
 /// "vpshufb like it was meant to be" on AVX2
 ///
 /// # Safety
diff --git a/library/portable-simd/crates/core_simd/src/vector.rs b/library/portable-simd/crates/core_simd/src/vector.rs
index 3e239169149..9c4dd36c24f 100644
--- a/library/portable-simd/crates/core_simd/src/vector.rs
+++ b/library/portable-simd/crates/core_simd/src/vector.rs
@@ -99,7 +99,7 @@ use crate::simd::{
 // directly constructing an instance of the type (i.e. `let vector = Simd(array)`) should be
 // avoided, as it will likely become illegal on `#[repr(simd)]` structs in the future. It also
 // causes rustc to emit illegal LLVM IR in some cases.
-#[repr(simd)]
+#[repr(simd, packed)]
 pub struct Simd<T, const N: usize>([T; N])
 where
     LaneCount<N>: SupportedLaneCount,
@@ -144,14 +144,32 @@ where
     /// assert_eq!(v.as_array(), &[8, 8, 8, 8]);
     /// ```
     #[inline]
-    pub fn splat(value: T) -> Self {
-        // This is preferred over `[value; N]`, since it's explicitly a splat:
-        // https://github.com/rust-lang/rust/issues/97804
-        struct Splat;
-        impl<const N: usize> Swizzle<N> for Splat {
-            const INDEX: [usize; N] = [0; N];
+    #[rustc_const_unstable(feature = "portable_simd", issue = "86656")]
+    pub const fn splat(value: T) -> Self {
+        const fn splat_const<T, const N: usize>(value: T) -> Simd<T, N>
+        where
+            T: SimdElement,
+            LaneCount<N>: SupportedLaneCount,
+        {
+            Simd::from_array([value; N])
         }
-        Splat::swizzle::<T, 1>(Simd::<T, 1>::from([value]))
+
+        fn splat_rt<T, const N: usize>(value: T) -> Simd<T, N>
+        where
+            T: SimdElement,
+            LaneCount<N>: SupportedLaneCount,
+        {
+            // This is preferred over `[value; N]`, since it's explicitly a splat:
+            // https://github.com/rust-lang/rust/issues/97804
+            struct Splat;
+            impl<const N: usize> Swizzle<N> for Splat {
+                const INDEX: [usize; N] = [0; N];
+            }
+
+            Splat::swizzle::<T, 1>(Simd::<T, 1>::from([value]))
+        }
+
+        core::intrinsics::const_eval_select((value,), splat_const, splat_rt)
     }
 
     /// Returns an array reference containing the entire SIMD vector.
@@ -425,6 +443,9 @@ where
     ///
     /// When the element is disabled, that memory location is not accessed and the corresponding
     /// value from `or` is passed through.
+    ///
+    /// # Safety
+    /// Enabled loads must not exceed the length of `slice`.
     #[must_use]
     #[inline]
     pub unsafe fn load_select_unchecked(
@@ -442,6 +463,9 @@ where
     ///
     /// When the element is disabled, that memory location is not accessed and the corresponding
     /// value from `or` is passed through.
+    ///
+    /// # Safety
+    /// Enabled `ptr` elements must be safe to read as if by `std::ptr::read`.
     #[must_use]
     #[inline]
     pub unsafe fn load_select_ptr(
@@ -924,6 +948,7 @@ where
     }
 }
 
+/// Lexicographic order. For the SIMD elementwise minimum and maximum, use simd_min and simd_max instead.
 impl<T, const N: usize> PartialOrd for Simd<T, N>
 where
     LaneCount<N>: SupportedLaneCount,
@@ -943,6 +968,7 @@ where
 {
 }
 
+/// Lexicographic order. For the SIMD elementwise minimum and maximum, use simd_min and simd_max instead.
 impl<T, const N: usize> Ord for Simd<T, N>
 where
     LaneCount<N>: SupportedLaneCount,
@@ -1195,6 +1221,7 @@ fn lane_indices<const N: usize>() -> Simd<usize, N>
 where
     LaneCount<N>: SupportedLaneCount,
 {
+    #![allow(clippy::needless_range_loop)]
     let mut index = [0; N];
     for i in 0..N {
         index[i] = i;
diff --git a/library/portable-simd/crates/core_simd/src/vendor.rs b/library/portable-simd/crates/core_simd/src/vendor.rs
index 1a34a3a8de5..57536e4fc77 100644
--- a/library/portable-simd/crates/core_simd/src/vendor.rs
+++ b/library/portable-simd/crates/core_simd/src/vendor.rs
@@ -29,3 +29,6 @@ mod arm;
 
 #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
 mod powerpc;
+
+#[cfg(target_arch = "loongarch64")]
+mod loongarch64;
diff --git a/library/portable-simd/crates/core_simd/src/vendor/loongarch64.rs b/library/portable-simd/crates/core_simd/src/vendor/loongarch64.rs
new file mode 100644
index 00000000000..1290bc166b2
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/vendor/loongarch64.rs
@@ -0,0 +1,31 @@
+use crate::simd::*;
+use core::arch::loongarch64::*;
+
+from_transmute! { unsafe u8x16 => v16u8 }
+from_transmute! { unsafe u8x32 => v32u8 }
+from_transmute! { unsafe i8x16 => v16i8 }
+from_transmute! { unsafe i8x32 => v32i8 }
+
+from_transmute! { unsafe u16x8 => v8u16 }
+from_transmute! { unsafe u16x16 => v16u16 }
+from_transmute! { unsafe i16x8 => v8i16 }
+from_transmute! { unsafe i16x16 => v16i16 }
+
+from_transmute! { unsafe u32x4 => v4u32 }
+from_transmute! { unsafe u32x8 => v8u32 }
+from_transmute! { unsafe i32x4 => v4i32 }
+from_transmute! { unsafe i32x8 => v8i32 }
+from_transmute! { unsafe f32x4 => v4f32 }
+from_transmute! { unsafe f32x8 => v8f32 }
+
+from_transmute! { unsafe u64x2 => v2u64 }
+from_transmute! { unsafe u64x4 => v4u64 }
+from_transmute! { unsafe i64x2 => v2i64 }
+from_transmute! { unsafe i64x4 => v4i64 }
+from_transmute! { unsafe f64x2 => v2f64 }
+from_transmute! { unsafe f64x4 => v4f64 }
+
+from_transmute! { unsafe usizex2 => v2u64 }
+from_transmute! { unsafe usizex4 => v4u64 }
+from_transmute! { unsafe isizex2 => v2i64 }
+from_transmute! { unsafe isizex4 => v4i64 }
diff --git a/library/portable-simd/crates/core_simd/tests/layout.rs b/library/portable-simd/crates/core_simd/tests/layout.rs
new file mode 100644
index 00000000000..24114c2d261
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/layout.rs
@@ -0,0 +1,35 @@
+#![feature(portable_simd)]
+
+macro_rules! layout_tests {
+    { $($mod:ident, $ty:ty,)* } => {
+        $(
+        mod $mod {
+            test_helpers::test_lanes! {
+                fn no_padding<const LANES: usize>() {
+                    assert_eq!(
+                        core::mem::size_of::<core_simd::simd::Simd::<$ty, LANES>>(),
+                        core::mem::size_of::<[$ty; LANES]>(),
+                    );
+                }
+            }
+        }
+        )*
+    }
+}
+
+layout_tests! {
+    i8, i8,
+    i16, i16,
+    i32, i32,
+    i64, i64,
+    isize, isize,
+    u8, u8,
+    u16, u16,
+    u32, u32,
+    u64, u64,
+    usize, usize,
+    f32, f32,
+    f64, f64,
+    mut_ptr, *mut (),
+    const_ptr, *const (),
+}
diff --git a/library/portable-simd/crates/core_simd/tests/masks.rs b/library/portable-simd/crates/core_simd/tests/masks.rs
index fc6a3476b7c..48786d02440 100644
--- a/library/portable-simd/crates/core_simd/tests/masks.rs
+++ b/library/portable-simd/crates/core_simd/tests/masks.rs
@@ -99,7 +99,6 @@ macro_rules! test_mask_api {
                 assert_eq!(Mask::<$type, 2>::from_bitmask(bitmask), mask);
             }
 
-            #[cfg(feature = "all_lane_counts")]
             #[test]
             fn roundtrip_bitmask_conversion_odd() {
                 let values = [
@@ -134,48 +133,6 @@ macro_rules! test_mask_api {
                 cast_impl::<i64>();
                 cast_impl::<isize>();
             }
-
-            #[test]
-            fn roundtrip_bitmask_vector_conversion() {
-                use core_simd::simd::ToBytes;
-                let values = [
-                    true, false, false, true, false, false, true, false,
-                    true, true, false, false, false, false, false, true,
-                ];
-                let mask = Mask::<$type, 16>::from_array(values);
-                let bitmask = mask.to_bitmask_vector();
-                assert_eq!(bitmask.resize::<2>(0).to_ne_bytes()[..2], [0b01001001, 0b10000011]);
-                assert_eq!(Mask::<$type, 16>::from_bitmask_vector(bitmask), mask);
-            }
-
-            // rust-lang/portable-simd#379
-            #[test]
-            fn roundtrip_bitmask_vector_conversion_small() {
-                use core_simd::simd::ToBytes;
-                let values = [
-                    true, false, true, true
-                ];
-                let mask = Mask::<$type, 4>::from_array(values);
-                let bitmask = mask.to_bitmask_vector();
-                assert_eq!(bitmask.resize::<1>(0).to_ne_bytes()[0], 0b00001101);
-                assert_eq!(Mask::<$type, 4>::from_bitmask_vector(bitmask), mask);
-            }
-
-            /* FIXME doesn't work with non-powers-of-two, yet
-            // rust-lang/portable-simd#379
-            #[cfg(feature = "all_lane_counts")]
-            #[test]
-            fn roundtrip_bitmask_vector_conversion_odd() {
-                use core_simd::simd::ToBytes;
-                let values = [
-                    true, false, true, false, true, true, false, false, false, true, true,
-                ];
-                let mask = Mask::<$type, 11>::from_array(values);
-                let bitmask = mask.to_bitmask_vector();
-                assert_eq!(bitmask.resize::<2>(0).to_ne_bytes()[..2], [0b00110101, 0b00000110]);
-                assert_eq!(Mask::<$type, 11>::from_bitmask_vector(bitmask), mask);
-            }
-            */
         }
     }
 }
diff --git a/library/portable-simd/crates/core_simd/tests/ops_macros.rs b/library/portable-simd/crates/core_simd/tests/ops_macros.rs
index aa565a13752..6de78f51e59 100644
--- a/library/portable-simd/crates/core_simd/tests/ops_macros.rs
+++ b/library/portable-simd/crates/core_simd/tests/ops_macros.rs
@@ -216,6 +216,22 @@ macro_rules! impl_common_integer_tests {
                 )
             }
 
+            fn count_ones<const LANES: usize>() {
+                test_helpers::test_unary_elementwise(
+                    &$vector::<LANES>::count_ones,
+                    &|x| x.count_ones() as _,
+                    &|_| true,
+                )
+            }
+
+            fn count_zeros<const LANES: usize>() {
+                test_helpers::test_unary_elementwise(
+                    &$vector::<LANES>::count_zeros,
+                    &|x| x.count_zeros() as _,
+                    &|_| true,
+                )
+            }
+
             fn leading_zeros<const LANES: usize>() {
                 test_helpers::test_unary_elementwise(
                     &$vector::<LANES>::leading_zeros,
@@ -307,6 +323,14 @@ macro_rules! impl_signed_tests {
                     assert_eq!(a % b, Vector::<LANES>::splat(0));
                 }
 
+                fn abs_diff<const LANES: usize>() {
+                    test_helpers::test_binary_elementwise(
+                        &Vector::<LANES>::abs_diff,
+                        &Scalar::abs_diff,
+                        &|_, _| true,
+                    )
+                }
+
                 fn simd_min<const LANES: usize>() {
                     use core_simd::simd::cmp::SimdOrd;
                     let a = Vector::<LANES>::splat(Scalar::MIN);
@@ -419,6 +443,14 @@ macro_rules! impl_unsigned_tests {
                         &|_| true,
                     );
                 }
+
+                fn abs_diff<const LANES: usize>() {
+                    test_helpers::test_binary_elementwise(
+                        &Vector::<LANES>::abs_diff,
+                        &Scalar::abs_diff,
+                        &|_, _| true,
+                    )
+                }
             }
 
             impl_binary_op_test!(Scalar, Add::add, AddAssign::add_assign, Scalar::wrapping_add);
@@ -495,6 +527,9 @@ macro_rules! impl_float_tests {
                 }
 
                 fn is_normal<const LANES: usize>() {
+                    // Arm v7 Neon violates float opsem re: subnormals, see
+                    // https://github.com/rust-lang/portable-simd/issues/439
+                    #[cfg(not(target_arch = "arm"))]
                     test_helpers::test_unary_mask_elementwise(
                         &Vector::<LANES>::is_normal,
                         &Scalar::is_normal,
@@ -503,6 +538,9 @@ macro_rules! impl_float_tests {
                 }
 
                 fn is_subnormal<const LANES: usize>() {
+                    // Arm v7 Neon violates float opsem re: subnormals, see
+                    // https://github.com/rust-lang/portable-simd/issues/439
+                    #[cfg(not(target_arch = "arm"))]
                     test_helpers::test_unary_mask_elementwise(
                         &Vector::<LANES>::is_subnormal,
                         &Scalar::is_subnormal,
diff --git a/library/portable-simd/crates/core_simd/tests/swizzle.rs b/library/portable-simd/crates/core_simd/tests/swizzle.rs
index 522d71439b7..7001e5f6bf8 100644
--- a/library/portable-simd/crates/core_simd/tests/swizzle.rs
+++ b/library/portable-simd/crates/core_simd/tests/swizzle.rs
@@ -50,6 +50,24 @@ fn rotate() {
 
 #[test]
 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+fn shift() {
+    let a = Simd::from_array([1, 2, 3, 4]);
+    assert_eq!(a.shift_elements_left::<0>(0).to_array(), [1, 2, 3, 4]);
+    assert_eq!(a.shift_elements_left::<1>(0).to_array(), [2, 3, 4, 0]);
+    assert_eq!(a.shift_elements_left::<2>(9).to_array(), [3, 4, 9, 9]);
+    assert_eq!(a.shift_elements_left::<3>(8).to_array(), [4, 8, 8, 8]);
+    assert_eq!(a.shift_elements_left::<4>(7).to_array(), [7, 7, 7, 7]);
+    assert_eq!(a.shift_elements_left::<5>(6).to_array(), [6, 6, 6, 6]);
+    assert_eq!(a.shift_elements_right::<0>(0).to_array(), [1, 2, 3, 4]);
+    assert_eq!(a.shift_elements_right::<1>(0).to_array(), [0, 1, 2, 3]);
+    assert_eq!(a.shift_elements_right::<2>(-1).to_array(), [-1, -1, 1, 2]);
+    assert_eq!(a.shift_elements_right::<3>(-2).to_array(), [-2, -2, -2, 1]);
+    assert_eq!(a.shift_elements_right::<4>(-3).to_array(), [-3, -3, -3, -3]);
+    assert_eq!(a.shift_elements_right::<5>(-4).to_array(), [-4, -4, -4, -4]);
+}
+
+#[test]
+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
 fn interleave() {
     let a = Simd::from_array([0, 1, 2, 3, 4, 5, 6, 7]);
     let b = Simd::from_array([8, 9, 10, 11, 12, 13, 14, 15]);
diff --git a/library/portable-simd/crates/test_helpers/Cargo.toml b/library/portable-simd/crates/test_helpers/Cargo.toml
index 23dae7c9338..a5359b9abc8 100644
--- a/library/portable-simd/crates/test_helpers/Cargo.toml
+++ b/library/portable-simd/crates/test_helpers/Cargo.toml
@@ -6,6 +6,3 @@ publish = false
 
 [dependencies]
 proptest = { version = "0.10", default-features = false, features = ["alloc"] }
-
-[features]
-all_lane_counts = []
diff --git a/library/portable-simd/crates/test_helpers/src/lib.rs b/library/portable-simd/crates/test_helpers/src/lib.rs
index 51b860a8635..197c920e11e 100644
--- a/library/portable-simd/crates/test_helpers/src/lib.rs
+++ b/library/portable-simd/crates/test_helpers/src/lib.rs
@@ -539,32 +539,22 @@ macro_rules! test_lanes {
                     #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)];
                     lanes_1 1;
                     lanes_2 2;
-                    lanes_4 4;
-                );
-
-                #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
-                $crate::test_lanes_helper!(
-                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)];
-                    lanes_8 8;
-                    lanes_16 16;
-                    lanes_32 32;
-                    lanes_64 64;
-                );
-
-                #[cfg(feature = "all_lane_counts")]
-                $crate::test_lanes_helper!(
-                    // test some odd and even non-power-of-2 lengths on miri
-                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)];
+                    // Cover an odd and an even non-power-of-2 length in Miri.
+                    // (Even non-power-of-2 vectors have alignment between element
+                    // and vector size, so we want to cover that case as well.)
                     lanes_3 3;
-                    lanes_5 5;
+
                     lanes_6 6;
                 );
 
-                #[cfg(feature = "all_lane_counts")]
                 #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
                 $crate::test_lanes_helper!(
                     #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)];
+                    lanes_4 4;
+                    lanes_5 5;
+
                     lanes_7 7;
+                    lanes_8 8;
                     lanes_9 9;
                     lanes_10 10;
                     lanes_11 11;
@@ -572,52 +562,55 @@ macro_rules! test_lanes {
                     lanes_13 13;
                     lanes_14 14;
                     lanes_15 15;
+                    lanes_16 16;
                     lanes_17 17;
-                    lanes_18 18;
-                    lanes_19 19;
-                    lanes_20 20;
-                    lanes_21 21;
-                    lanes_22 22;
-                    lanes_23 23;
+                    //lanes_18 18;
+                    //lanes_19 19;
+                    //lanes_20 20;
+                    //lanes_21 21;
+                    //lanes_22 22;
+                    //lanes_23 23;
                     lanes_24 24;
-                    lanes_25 25;
-                    lanes_26 26;
-                    lanes_27 27;
-                    lanes_28 28;
-                    lanes_29 29;
-                    lanes_30 30;
-                    lanes_31 31;
-                    lanes_33 33;
-                    lanes_34 34;
-                    lanes_35 35;
-                    lanes_36 36;
-                    lanes_37 37;
-                    lanes_38 38;
-                    lanes_39 39;
-                    lanes_40 40;
-                    lanes_41 41;
-                    lanes_42 42;
-                    lanes_43 43;
-                    lanes_44 44;
-                    lanes_45 45;
-                    lanes_46 46;
+                    //lanes_25 25;
+                    //lanes_26 26;
+                    //lanes_27 27;
+                    //lanes_28 28;
+                    //lanes_29 29;
+                    //lanes_30 30;
+                    //lanes_31 31;
+                    lanes_32 32;
+                    //lanes_33 33;
+                    //lanes_34 34;
+                    //lanes_35 35;
+                    //lanes_36 36;
+                    //lanes_37 37;
+                    //lanes_38 38;
+                    //lanes_39 39;
+                    //lanes_40 40;
+                    //lanes_41 41;
+                    //lanes_42 42;
+                    //lanes_43 43;
+                    //lanes_44 44;
+                    //lanes_45 45;
+                    //lanes_46 46;
                     lanes_47 47;
-                    lanes_48 48;
-                    lanes_49 49;
-                    lanes_50 50;
-                    lanes_51 51;
-                    lanes_52 52;
-                    lanes_53 53;
-                    lanes_54 54;
-                    lanes_55 55;
+                    //lanes_48 48;
+                    //lanes_49 49;
+                    //lanes_50 50;
+                    //lanes_51 51;
+                    //lanes_52 52;
+                    //lanes_53 53;
+                    //lanes_54 54;
+                    //lanes_55 55;
                     lanes_56 56;
                     lanes_57 57;
-                    lanes_58 58;
-                    lanes_59 59;
-                    lanes_60 60;
-                    lanes_61 61;
-                    lanes_62 62;
+                    //lanes_58 58;
+                    //lanes_59 59;
+                    //lanes_60 60;
+                    //lanes_61 61;
+                    //lanes_62 62;
                     lanes_63 63;
+                    lanes_64 64;
                 );
             }
         )*
@@ -639,36 +632,24 @@ macro_rules! test_lanes_panic {
                     core_simd::simd::LaneCount<$lanes>: core_simd::simd::SupportedLaneCount,
                 $body
 
+                // test some odd and even non-power-of-2 lengths on miri
                 $crate::test_lanes_helper!(
                     #[should_panic];
                     lanes_1 1;
                     lanes_2 2;
-                    lanes_4 4;
-                );
-
-                #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
-                $crate::test_lanes_helper!(
-                    #[should_panic];
-                    lanes_8 8;
-                    lanes_16 16;
-                    lanes_32 32;
-                    lanes_64 64;
-                );
-
-                #[cfg(feature = "all_lane_counts")]
-                $crate::test_lanes_helper!(
-                    // test some odd and even non-power-of-2 lengths on miri
-                    #[should_panic];
                     lanes_3 3;
-                    lanes_5 5;
+
                     lanes_6 6;
                 );
 
-                #[cfg(feature = "all_lane_counts")]
                 #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
                 $crate::test_lanes_helper!(
                     #[should_panic];
+                    lanes_4 4;
+                    lanes_5 5;
+
                     lanes_7 7;
+                    lanes_8 8;
                     lanes_9 9;
                     lanes_10 10;
                     lanes_11 11;
@@ -676,52 +657,55 @@ macro_rules! test_lanes_panic {
                     lanes_13 13;
                     lanes_14 14;
                     lanes_15 15;
+                    lanes_16 16;
                     lanes_17 17;
-                    lanes_18 18;
-                    lanes_19 19;
-                    lanes_20 20;
-                    lanes_21 21;
-                    lanes_22 22;
-                    lanes_23 23;
+                    //lanes_18 18;
+                    //lanes_19 19;
+                    //lanes_20 20;
+                    //lanes_21 21;
+                    //lanes_22 22;
+                    //lanes_23 23;
                     lanes_24 24;
-                    lanes_25 25;
-                    lanes_26 26;
-                    lanes_27 27;
-                    lanes_28 28;
-                    lanes_29 29;
-                    lanes_30 30;
-                    lanes_31 31;
-                    lanes_33 33;
-                    lanes_34 34;
-                    lanes_35 35;
-                    lanes_36 36;
-                    lanes_37 37;
-                    lanes_38 38;
-                    lanes_39 39;
-                    lanes_40 40;
-                    lanes_41 41;
-                    lanes_42 42;
-                    lanes_43 43;
-                    lanes_44 44;
-                    lanes_45 45;
-                    lanes_46 46;
+                    //lanes_25 25;
+                    //lanes_26 26;
+                    //lanes_27 27;
+                    //lanes_28 28;
+                    //lanes_29 29;
+                    //lanes_30 30;
+                    //lanes_31 31;
+                    lanes_32 32;
+                    //lanes_33 33;
+                    //lanes_34 34;
+                    //lanes_35 35;
+                    //lanes_36 36;
+                    //lanes_37 37;
+                    //lanes_38 38;
+                    //lanes_39 39;
+                    //lanes_40 40;
+                    //lanes_41 41;
+                    //lanes_42 42;
+                    //lanes_43 43;
+                    //lanes_44 44;
+                    //lanes_45 45;
+                    //lanes_46 46;
                     lanes_47 47;
-                    lanes_48 48;
-                    lanes_49 49;
-                    lanes_50 50;
-                    lanes_51 51;
-                    lanes_52 52;
-                    lanes_53 53;
-                    lanes_54 54;
-                    lanes_55 55;
+                    //lanes_48 48;
+                    //lanes_49 49;
+                    //lanes_50 50;
+                    //lanes_51 51;
+                    //lanes_52 52;
+                    //lanes_53 53;
+                    //lanes_54 54;
+                    //lanes_55 55;
                     lanes_56 56;
                     lanes_57 57;
-                    lanes_58 58;
-                    lanes_59 59;
-                    lanes_60 60;
-                    lanes_61 61;
-                    lanes_62 62;
+                    //lanes_58 58;
+                    //lanes_59 59;
+                    //lanes_60 60;
+                    //lanes_61 61;
+                    //lanes_62 62;
                     lanes_63 63;
+                    lanes_64 64;
                 );
             }
         )*
diff --git a/library/portable-simd/rust-toolchain.toml b/library/portable-simd/rust-toolchain.toml
new file mode 100644
index 00000000000..d17c6d2e889
--- /dev/null
+++ b/library/portable-simd/rust-toolchain.toml
@@ -0,0 +1,3 @@
+[toolchain]
+channel = "nightly-2025-01-16"
+components = ["rustfmt", "clippy", "miri", "rust-src"]
diff --git a/library/portable-simd/subtree-sync.sh b/library/portable-simd/subtree-sync.sh
new file mode 100755
index 00000000000..18360077623
--- /dev/null
+++ b/library/portable-simd/subtree-sync.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+set -eou pipefail
+
+git fetch origin
+pushd $2
+git fetch origin
+popd
+
+if [ "$(git rev-parse --show-prefix)" != "" ]; then
+    echo "Run this script from the git root" >&2
+    exit 1
+fi
+
+if [ "$(git rev-parse HEAD)" != "$(git rev-parse origin/master)" ]; then
+    echo "$(pwd) is not at origin/master" >&2
+    exit 1
+fi
+
+if [ ! -f library/portable-simd/git-subtree.sh ]; then
+    curl -sS https://raw.githubusercontent.com/bjorn3/git/tqc-subtree-portable/contrib/subtree/git-subtree.sh -o library/portable-simd/git-subtree.sh
+    chmod +x library/portable-simd/git-subtree.sh
+fi
+
+today=$(date +%Y-%m-%d)
+
+case $1 in
+    "push")
+        upstream=rust-upstream-$today
+        merge=sync-from-rust-$today
+
+        pushd $2
+        git checkout master
+        git pull
+        popd
+
+        library/portable-simd/git-subtree.sh push -P library/portable-simd $2 $upstream
+
+        pushd $2
+        git checkout -B $merge origin/master
+        git merge $upstream
+        popd
+        echo "Branch \`$merge\` created in \`$2\`. You may need to resolve merge conflicts."
+        ;;
+    "pull")
+        branch=sync-from-portable-simd-$today
+
+        git checkout -B $branch
+        echo "Creating branch \`$branch\`... You may need to resolve merge conflicts."
+        library/portable-simd/git-subtree.sh pull -P library/portable-simd $2 origin/master
+        ;;
+esac