about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--compiler/rustc_codegen_ssa/src/mir/intrinsic.rs24
-rw-r--r--compiler/rustc_codegen_ssa/src/traits/builder.rs54
-rw-r--r--compiler/rustc_codegen_ssa/src/traits/type_.rs14
-rw-r--r--compiler/rustc_const_eval/src/interpret/intrinsics.rs25
-rw-r--r--compiler/rustc_hir_analysis/src/check/intrinsic.rs2
-rw-r--r--compiler/rustc_span/src/symbol.rs1
-rw-r--r--library/core/src/intrinsics.rs22
-rw-r--r--library/core/src/lib.rs1
-rw-r--r--library/core/src/mem/mod.rs60
-rw-r--r--library/core/src/ptr/mod.rs21
-rw-r--r--src/tools/miri/tests/fail/intrinsics/typed-swap-invalid-array.rs19
-rw-r--r--src/tools/miri/tests/fail/intrinsics/typed-swap-invalid-array.stderr20
-rw-r--r--src/tools/miri/tests/fail/intrinsics/typed-swap-invalid-scalar.rs19
-rw-r--r--src/tools/miri/tests/fail/intrinsics/typed-swap-invalid-scalar.stderr20
-rw-r--r--tests/assembly/x86_64-typed-swap.rs53
-rw-r--r--tests/codegen/intrinsics/typed_swap.rs78
-rw-r--r--tests/codegen/swap-small-types.rs5
-rw-r--r--tests/ui/consts/missing_span_in_backtrace.stderr2
18 files changed, 370 insertions, 70 deletions
diff --git a/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs b/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs
index 5532ff6e6a5..3e6cf0ece29 100644
--- a/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs
@@ -9,6 +9,7 @@ use crate::traits::*;
 use crate::MemFlags;
 
 use rustc_middle::ty::{self, Ty, TyCtxt};
+use rustc_session::config::OptLevel;
 use rustc_span::{sym, Span};
 use rustc_target::abi::{
     call::{FnAbi, PassMode},
@@ -75,6 +76,29 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
         let name = bx.tcx().item_name(def_id);
         let name_str = name.as_str();
 
+        // If we're swapping something that's *not* an `OperandValue::Ref`,
+        // then we can do it directly and avoid the alloca.
+        // Otherwise, we'll let the fallback MIR body take care of it.
+        if let sym::typed_swap = name {
+            let pointee_ty = fn_args.type_at(0);
+            let pointee_layout = bx.layout_of(pointee_ty);
+            if !bx.is_backend_ref(pointee_layout)
+                // But if we're not going to optimize, trying to use the fallback
+                // body just makes things worse, so don't bother.
+                || bx.sess().opts.optimize == OptLevel::No
+                // NOTE(eddyb) SPIR-V's Logical addressing model doesn't allow for arbitrary
+                // reinterpretation of values as (chunkable) byte arrays, and the loop in the
+                // block optimization in `ptr::swap_nonoverlapping` is hard to rewrite back
+                // into the (unoptimized) direct swapping implementation, so we disable it.
+                || bx.sess().target.arch == "spirv"
+            {
+                let x_place = PlaceRef::new_sized(args[0].immediate(), pointee_layout);
+                let y_place = PlaceRef::new_sized(args[1].immediate(), pointee_layout);
+                bx.typed_place_swap(x_place, y_place);
+                return Ok(());
+            }
+        }
+
         let llret_ty = bx.backend_type(bx.layout_of(ret_ty));
         let result = PlaceRef::new_sized(llresult, fn_abi.ret.layout);
 
diff --git a/compiler/rustc_codegen_ssa/src/traits/builder.rs b/compiler/rustc_codegen_ssa/src/traits/builder.rs
index 36f37e3791b..7bc9dee3a89 100644
--- a/compiler/rustc_codegen_ssa/src/traits/builder.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/builder.rs
@@ -1,22 +1,24 @@
 use super::abi::AbiBuilderMethods;
 use super::asm::AsmBuilderMethods;
+use super::consts::ConstMethods;
 use super::coverageinfo::CoverageInfoBuilderMethods;
 use super::debuginfo::DebugInfoBuilderMethods;
 use super::intrinsic::IntrinsicCallMethods;
 use super::misc::MiscMethods;
-use super::type_::{ArgAbiMethods, BaseTypeMethods};
+use super::type_::{ArgAbiMethods, BaseTypeMethods, LayoutTypeMethods};
 use super::{HasCodegen, StaticBuilderMethods};
 
 use crate::common::{
     AtomicOrdering, AtomicRmwBinOp, IntPredicate, RealPredicate, SynchronizationScope, TypeKind,
 };
-use crate::mir::operand::OperandRef;
+use crate::mir::operand::{OperandRef, OperandValue};
 use crate::mir::place::PlaceRef;
 use crate::MemFlags;
 
 use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
 use rustc_middle::ty::layout::{HasParamEnv, TyAndLayout};
 use rustc_middle::ty::Ty;
+use rustc_session::config::OptLevel;
 use rustc_span::Span;
 use rustc_target::abi::call::FnAbi;
 use rustc_target::abi::{Abi, Align, Scalar, Size, WrappingRange};
@@ -267,6 +269,54 @@ pub trait BuilderMethods<'a, 'tcx>:
         flags: MemFlags,
     );
 
+    /// *Typed* copy for non-overlapping places.
+    ///
+    /// Has a default implementation in terms of `memcpy`, but specific backends
+    /// can override to do something smarter if possible.
+    ///
+    /// (For example, typed load-stores with alias metadata.)
+    fn typed_place_copy(
+        &mut self,
+        dst: PlaceRef<'tcx, Self::Value>,
+        src: PlaceRef<'tcx, Self::Value>,
+    ) {
+        debug_assert!(src.llextra.is_none());
+        debug_assert!(dst.llextra.is_none());
+        debug_assert_eq!(dst.layout.size, src.layout.size);
+        if self.sess().opts.optimize == OptLevel::No && self.is_backend_immediate(dst.layout) {
+            // If we're not optimizing, the aliasing information from `memcpy`
+            // isn't useful, so just load-store the value for smaller code.
+            let temp = self.load_operand(src);
+            temp.val.store(self, dst);
+        } else if !dst.layout.is_zst() {
+            let bytes = self.const_usize(dst.layout.size.bytes());
+            self.memcpy(dst.llval, dst.align, src.llval, src.align, bytes, MemFlags::empty());
+        }
+    }
+
+    /// *Typed* swap for non-overlapping places.
+    ///
+    /// Avoids `alloca`s for Immediates and ScalarPairs.
+    ///
+    /// FIXME: Maybe do something smarter for Ref types too?
+    /// For now, the `typed_swap` intrinsic just doesn't call this for those
+    /// cases (in non-debug), preferring the fallback body instead.
+    fn typed_place_swap(
+        &mut self,
+        left: PlaceRef<'tcx, Self::Value>,
+        right: PlaceRef<'tcx, Self::Value>,
+    ) {
+        let mut temp = self.load_operand(left);
+        if let OperandValue::Ref(..) = temp.val {
+            // The SSA value isn't stand-alone, so we need to copy it elsewhere
+            let alloca = PlaceRef::alloca(self, left.layout);
+            self.typed_place_copy(alloca, left);
+            temp = self.load_operand(alloca);
+        }
+        self.typed_place_copy(left, right);
+        temp.val.store(self, right);
+    }
+
     fn select(
         &mut self,
         cond: Self::Value,
diff --git a/compiler/rustc_codegen_ssa/src/traits/type_.rs b/compiler/rustc_codegen_ssa/src/traits/type_.rs
index 72cce43a3fa..555833759eb 100644
--- a/compiler/rustc_codegen_ssa/src/traits/type_.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/type_.rs
@@ -120,6 +120,20 @@ pub trait LayoutTypeMethods<'tcx>: Backend<'tcx> {
         immediate: bool,
     ) -> Self::Type;
 
+    /// A type that produces an [`OperandValue::Ref`] when loaded.
+    ///
+    /// AKA one that's not a ZST, not `is_backend_immediate`, and
+    /// not `is_backend_scalar_pair`. For such a type, a
+    /// [`load_operand`] doesn't actually `load` anything.
+    ///
+    /// [`OperandValue::Ref`]: crate::mir::operand::OperandValue::Ref
+    /// [`load_operand`]: super::BuilderMethods::load_operand
+    fn is_backend_ref(&self, layout: TyAndLayout<'tcx>) -> bool {
+        !(layout.is_zst()
+            || self.is_backend_immediate(layout)
+            || self.is_backend_scalar_pair(layout))
+    }
+
     /// A type that can be used in a [`super::BuilderMethods::load`] +
     /// [`super::BuilderMethods::store`] pair to implement a *typed* copy,
     /// such as a MIR `*_0 = *_1`.
diff --git a/compiler/rustc_const_eval/src/interpret/intrinsics.rs b/compiler/rustc_const_eval/src/interpret/intrinsics.rs
index a84ef4ce08e..a8478f721c7 100644
--- a/compiler/rustc_const_eval/src/interpret/intrinsics.rs
+++ b/compiler/rustc_const_eval/src/interpret/intrinsics.rs
@@ -21,8 +21,8 @@ use rustc_span::symbol::{sym, Symbol};
 use rustc_target::abi::Size;
 
 use super::{
-    util::ensure_monomorphic_enough, CheckInAllocMsg, ImmTy, InterpCx, MPlaceTy, Machine, OpTy,
-    Pointer,
+    memory::MemoryKind, util::ensure_monomorphic_enough, CheckInAllocMsg, ImmTy, InterpCx,
+    MPlaceTy, Machine, OpTy, Pointer,
 };
 
 use crate::fluent_generated as fluent;
@@ -414,6 +414,9 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
                 let result = self.raw_eq_intrinsic(&args[0], &args[1])?;
                 self.write_scalar(result, dest)?;
             }
+            sym::typed_swap => {
+                self.typed_swap_intrinsic(&args[0], &args[1])?;
+            }
 
             sym::vtable_size => {
                 let ptr = self.read_pointer(&args[0])?;
@@ -607,6 +610,24 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         self.mem_copy(src, dst, size, nonoverlapping)
     }
 
+    /// Does a *typed* swap of `*left` and `*right`.
+    fn typed_swap_intrinsic(
+        &mut self,
+        left: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::Provenance>,
+        right: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::Provenance>,
+    ) -> InterpResult<'tcx> {
+        let left = self.deref_pointer(left)?;
+        let right = self.deref_pointer(right)?;
+        debug_assert_eq!(left.layout, right.layout);
+        let kind = MemoryKind::Stack;
+        let temp = self.allocate(left.layout, kind)?;
+        self.copy_op(&left, &temp)?;
+        self.copy_op(&right, &left)?;
+        self.copy_op(&temp, &right)?;
+        self.deallocate_ptr(temp.ptr(), None, kind)?;
+        Ok(())
+    }
+
     pub(crate) fn write_bytes_intrinsic(
         &mut self,
         dst: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::Provenance>,
diff --git a/compiler/rustc_hir_analysis/src/check/intrinsic.rs b/compiler/rustc_hir_analysis/src/check/intrinsic.rs
index 0b526a8c977..1d5d4a3205c 100644
--- a/compiler/rustc_hir_analysis/src/check/intrinsic.rs
+++ b/compiler/rustc_hir_analysis/src/check/intrinsic.rs
@@ -484,6 +484,8 @@ pub fn check_intrinsic_type(
                 (1, 0, vec![Ty::new_mut_ptr(tcx, param(0)), param(0)], Ty::new_unit(tcx))
             }
 
+            sym::typed_swap => (1, 1, vec![Ty::new_mut_ptr(tcx, param(0)); 2], Ty::new_unit(tcx)),
+
             sym::discriminant_value => {
                 let assoc_items = tcx.associated_item_def_ids(
                     tcx.require_lang_item(hir::LangItem::DiscriminantKind, None),
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index b6c07e8737f..cae860cf2f7 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -1836,6 +1836,7 @@ symbols! {
         type_macros,
         type_name,
         type_privacy_lints,
+        typed_swap,
         u128,
         u128_legacy_const_max,
         u128_legacy_const_min,
diff --git a/library/core/src/intrinsics.rs b/library/core/src/intrinsics.rs
index 613d0ab212a..a0d5e220b86 100644
--- a/library/core/src/intrinsics.rs
+++ b/library/core/src/intrinsics.rs
@@ -66,6 +66,7 @@
 use crate::marker::DiscriminantKind;
 use crate::marker::Tuple;
 use crate::mem::align_of;
+use crate::ptr;
 
 pub mod mir;
 pub mod simd;
@@ -2638,6 +2639,27 @@ pub const fn is_val_statically_known<T: Copy>(_arg: T) -> bool {
     false
 }
 
+/// Non-overlapping *typed* swap of a single value.
+///
+/// The codegen backends will replace this with a better implementation when
+/// `T` is a simple type that can be loaded and stored as an immediate.
+///
+/// The stabilized form of this intrinsic is [`crate::mem::swap`].
+///
+/// # Safety
+///
+/// `x` and `y` are readable and writable as `T`, and non-overlapping.
+#[rustc_nounwind]
+#[inline]
+#[cfg_attr(not(bootstrap), rustc_intrinsic)]
+// This has fallback `const fn` MIR, so shouldn't need stability, see #122652
+#[rustc_const_unstable(feature = "const_typed_swap", issue = "none")]
+pub const unsafe fn typed_swap<T>(x: *mut T, y: *mut T) {
+    // SAFETY: The caller provided single non-overlapping items behind
+    // pointers, so swapping them with `count: 1` is fine.
+    unsafe { ptr::swap_nonoverlapping(x, y, 1) };
+}
+
 /// Returns whether we should check for library UB. This evaluate to the value of `cfg!(debug_assertions)`
 /// during monomorphization.
 ///
diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs
index 2718dd11473..ded8f4ba841 100644
--- a/library/core/src/lib.rs
+++ b/library/core/src/lib.rs
@@ -170,6 +170,7 @@
 #![feature(const_try)]
 #![feature(const_type_id)]
 #![feature(const_type_name)]
+#![feature(const_typed_swap)]
 #![feature(const_unicode_case_lookup)]
 #![feature(const_unsafecell_get_mut)]
 #![feature(const_waker)]
diff --git a/library/core/src/mem/mod.rs b/library/core/src/mem/mod.rs
index d1dc6720271..75d42edbaa0 100644
--- a/library/core/src/mem/mod.rs
+++ b/library/core/src/mem/mod.rs
@@ -726,63 +726,9 @@ pub unsafe fn uninitialized<T>() -> T {
 #[rustc_const_unstable(feature = "const_swap", issue = "83163")]
 #[rustc_diagnostic_item = "mem_swap"]
 pub const fn swap<T>(x: &mut T, y: &mut T) {
-    // NOTE(eddyb) SPIR-V's Logical addressing model doesn't allow for arbitrary
-    // reinterpretation of values as (chunkable) byte arrays, and the loop in the
-    // block optimization in `swap_slice` is hard to rewrite back
-    // into the (unoptimized) direct swapping implementation, so we disable it.
-    #[cfg(not(any(target_arch = "spirv")))]
-    {
-        // For types that are larger multiples of their alignment, the simple way
-        // tends to copy the whole thing to stack rather than doing it one part
-        // at a time, so instead treat them as one-element slices and piggy-back
-        // the slice optimizations that will split up the swaps.
-        if const { size_of::<T>() / align_of::<T>() > 2 } {
-            // SAFETY: exclusive references always point to one non-overlapping
-            // element and are non-null and properly aligned.
-            return unsafe { ptr::swap_nonoverlapping(x, y, 1) };
-        }
-    }
-
-    // If a scalar consists of just a small number of alignment units, let
-    // the codegen just swap those pieces directly, as it's likely just a
-    // few instructions and anything else is probably overcomplicated.
-    //
-    // Most importantly, this covers primitives and simd types that tend to
-    // have size=align where doing anything else can be a pessimization.
-    // (This will also be used for ZSTs, though any solution works for them.)
-    swap_simple(x, y);
-}
-
-/// Same as [`swap`] semantically, but always uses the simple implementation.
-///
-/// Used elsewhere in `mem` and `ptr` at the bottom layer of calls.
-#[rustc_const_unstable(feature = "const_swap", issue = "83163")]
-#[inline]
-pub(crate) const fn swap_simple<T>(x: &mut T, y: &mut T) {
-    // We arrange for this to typically be called with small types,
-    // so this reads-and-writes approach is actually better than using
-    // copy_nonoverlapping as it easily puts things in LLVM registers
-    // directly and doesn't end up inlining allocas.
-    // And LLVM actually optimizes it to 3Ă—memcpy if called with
-    // a type larger than it's willing to keep in a register.
-    // Having typed reads and writes in MIR here is also good as
-    // it lets Miri and CTFE understand them better, including things
-    // like enforcing type validity for them.
-    // Importantly, read+copy_nonoverlapping+write introduces confusing
-    // asymmetry to the behaviour where one value went through read+write
-    // whereas the other was copied over by the intrinsic (see #94371).
-    // Furthermore, using only read+write here benefits limited backends
-    // such as SPIR-V that work on an underlying *typed* view of memory,
-    // and thus have trouble with Rust's untyped memory operations.
-
-    // SAFETY: exclusive references are always valid to read/write,
-    // including being aligned, and nothing here panics so it's drop-safe.
-    unsafe {
-        let a = ptr::read(x);
-        let b = ptr::read(y);
-        ptr::write(x, b);
-        ptr::write(y, a);
-    }
+    // SAFETY: `&mut` guarantees these are typed readable and writable
+    // as well as non-overlapping.
+    unsafe { intrinsics::typed_swap(x, y) }
 }
 
 /// Replaces `dest` with the default value of `T`, returning the previous `dest` value.
diff --git a/library/core/src/ptr/mod.rs b/library/core/src/ptr/mod.rs
index 1f0204daf72..0662dfe9a15 100644
--- a/library/core/src/ptr/mod.rs
+++ b/library/core/src/ptr/mod.rs
@@ -1062,11 +1062,26 @@ const unsafe fn swap_nonoverlapping_simple_untyped<T>(x: *mut T, y: *mut T, coun
     let mut i = 0;
     while i < count {
         // SAFETY: By precondition, `i` is in-bounds because it's below `n`
-        let x = unsafe { &mut *x.add(i) };
+        let x = unsafe { x.add(i) };
         // SAFETY: By precondition, `i` is in-bounds because it's below `n`
         // and it's distinct from `x` since the ranges are non-overlapping
-        let y = unsafe { &mut *y.add(i) };
-        mem::swap_simple::<MaybeUninit<T>>(x, y);
+        let y = unsafe { y.add(i) };
+
+        // If we end up here, it's because we're using a simple type -- like
+        // a small power-of-two-sized thing -- or a special type with particularly
+        // large alignment, particularly SIMD types.
+        // Thus we're fine just reading-and-writing it, as either it's small
+        // and that works well anyway or it's special and the type's author
+        // presumably wanted things to be done in the larger chunk.
+
+        // SAFETY: we're only ever given pointers that are valid to read/write,
+        // including being aligned, and nothing here panics so it's drop-safe.
+        unsafe {
+            let a: MaybeUninit<T> = read(x);
+            let b: MaybeUninit<T> = read(y);
+            write(x, b);
+            write(y, a);
+        }
 
         i += 1;
     }
diff --git a/src/tools/miri/tests/fail/intrinsics/typed-swap-invalid-array.rs b/src/tools/miri/tests/fail/intrinsics/typed-swap-invalid-array.rs
new file mode 100644
index 00000000000..89fdd2a01eb
--- /dev/null
+++ b/src/tools/miri/tests/fail/intrinsics/typed-swap-invalid-array.rs
@@ -0,0 +1,19 @@
+#![feature(core_intrinsics)]
+#![feature(rustc_attrs)]
+
+use std::intrinsics::typed_swap;
+use std::ptr::addr_of_mut;
+
+fn invalid_array() {
+    let mut a = [1_u8; 100];
+    let mut b = [2_u8; 100];
+    unsafe {
+        let a = addr_of_mut!(a).cast::<[bool; 100]>();
+        let b = addr_of_mut!(b).cast::<[bool; 100]>();
+        typed_swap(a, b); //~ERROR: constructing invalid value
+    }
+}
+
+fn main() {
+    invalid_array();
+}
diff --git a/src/tools/miri/tests/fail/intrinsics/typed-swap-invalid-array.stderr b/src/tools/miri/tests/fail/intrinsics/typed-swap-invalid-array.stderr
new file mode 100644
index 00000000000..15f01c1c095
--- /dev/null
+++ b/src/tools/miri/tests/fail/intrinsics/typed-swap-invalid-array.stderr
@@ -0,0 +1,20 @@
+error: Undefined Behavior: constructing invalid value at [0]: encountered 0x02, but expected a boolean
+  --> $DIR/typed-swap-invalid-array.rs:LL:CC
+   |
+LL |         typed_swap(a, b);
+   |         ^^^^^^^^^^^^^^^^ constructing invalid value at [0]: encountered 0x02, but expected a boolean
+   |
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE:
+   = note: inside `invalid_array` at $DIR/typed-swap-invalid-array.rs:LL:CC
+note: inside `main`
+  --> $DIR/typed-swap-invalid-array.rs:LL:CC
+   |
+LL |     invalid_array();
+   |     ^^^^^^^^^^^^^^^
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to 1 previous error
+
diff --git a/src/tools/miri/tests/fail/intrinsics/typed-swap-invalid-scalar.rs b/src/tools/miri/tests/fail/intrinsics/typed-swap-invalid-scalar.rs
new file mode 100644
index 00000000000..9d014a523f8
--- /dev/null
+++ b/src/tools/miri/tests/fail/intrinsics/typed-swap-invalid-scalar.rs
@@ -0,0 +1,19 @@
+#![feature(core_intrinsics)]
+#![feature(rustc_attrs)]
+
+use std::intrinsics::typed_swap;
+use std::ptr::addr_of_mut;
+
+fn invalid_scalar() {
+    let mut a = 1_u8;
+    let mut b = 2_u8;
+    unsafe {
+        let a = addr_of_mut!(a).cast::<bool>();
+        let b = addr_of_mut!(b).cast::<bool>();
+        typed_swap(a, b); //~ERROR: constructing invalid value
+    }
+}
+
+fn main() {
+    invalid_scalar();
+}
diff --git a/src/tools/miri/tests/fail/intrinsics/typed-swap-invalid-scalar.stderr b/src/tools/miri/tests/fail/intrinsics/typed-swap-invalid-scalar.stderr
new file mode 100644
index 00000000000..262ca202f9f
--- /dev/null
+++ b/src/tools/miri/tests/fail/intrinsics/typed-swap-invalid-scalar.stderr
@@ -0,0 +1,20 @@
+error: Undefined Behavior: constructing invalid value: encountered 0x02, but expected a boolean
+  --> $DIR/typed-swap-invalid-scalar.rs:LL:CC
+   |
+LL |         typed_swap(a, b);
+   |         ^^^^^^^^^^^^^^^^ constructing invalid value: encountered 0x02, but expected a boolean
+   |
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE:
+   = note: inside `invalid_scalar` at $DIR/typed-swap-invalid-scalar.rs:LL:CC
+note: inside `main`
+  --> $DIR/typed-swap-invalid-scalar.rs:LL:CC
+   |
+LL |     invalid_scalar();
+   |     ^^^^^^^^^^^^^^^^
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to 1 previous error
+
diff --git a/tests/assembly/x86_64-typed-swap.rs b/tests/assembly/x86_64-typed-swap.rs
new file mode 100644
index 00000000000..95e87519e6c
--- /dev/null
+++ b/tests/assembly/x86_64-typed-swap.rs
@@ -0,0 +1,53 @@
+//@ revisions: WIN LIN
+//@ [WIN] only-windows
+//@ [LIN] only-linux
+//@ only-x86_64
+//@ assembly-output: emit-asm
+//@ compile-flags: --crate-type=lib -O
+
+use std::arch::x86_64::__m128;
+use std::mem::swap;
+
+// CHECK-LABEL: swap_i32:
+#[no_mangle]
+pub fn swap_i32(x: &mut i32, y: &mut i32) {
+    // CHECK: movl (%[[ARG1:.+]]), %[[T1:.+]]
+    // CHECK: movl (%[[ARG2:.+]]), %[[T2:.+]]
+    // CHECK: movl %[[T2]], (%[[ARG1]])
+    // CHECK: movl %[[T1]], (%[[ARG2]])
+    // CHECK: retq
+    swap(x, y)
+}
+
+// CHECK-LABEL: swap_pair:
+#[no_mangle]
+pub fn swap_pair(x: &mut (i32, u32), y: &mut (i32, u32)) {
+    // CHECK: movq (%[[ARG1]]), %[[T1:.+]]
+    // CHECK: movq (%[[ARG2]]), %[[T2:.+]]
+    // CHECK: movq %[[T2]], (%[[ARG1]])
+    // CHECK: movq %[[T1]], (%[[ARG2]])
+    // CHECK: retq
+    swap(x, y)
+}
+
+// CHECK-LABEL: swap_str:
+#[no_mangle]
+pub fn swap_str<'a>(x: &mut &'a str, y: &mut &'a str) {
+    // CHECK: movups (%[[ARG1]]), %[[T1:xmm.]]
+    // CHECK: movups (%[[ARG2]]), %[[T2:xmm.]]
+    // CHECK: movups %[[T2]], (%[[ARG1]])
+    // CHECK: movups %[[T1]], (%[[ARG2]])
+    // CHECK: retq
+    swap(x, y)
+}
+
+// CHECK-LABEL: swap_simd:
+#[no_mangle]
+pub fn swap_simd(x: &mut __m128, y: &mut __m128) {
+    // CHECK: movaps (%[[ARG1]]), %[[T1:xmm.]]
+    // CHECK: movaps (%[[ARG2]]), %[[T2:xmm.]]
+    // CHECK: movaps %[[T2]], (%[[ARG1]])
+    // CHECK: movaps %[[T1]], (%[[ARG2]])
+    // CHECK: retq
+    swap(x, y)
+}
diff --git a/tests/codegen/intrinsics/typed_swap.rs b/tests/codegen/intrinsics/typed_swap.rs
new file mode 100644
index 00000000000..b55fb8ee36f
--- /dev/null
+++ b/tests/codegen/intrinsics/typed_swap.rs
@@ -0,0 +1,78 @@
+//@ revisions: OPT0 OPT3
+//@ [OPT0] compile-flags: -Copt-level=0
+//@ [OPT3] compile-flags: -Copt-level=3
+//@ compile-flags: -C no-prepopulate-passes
+//@ only-64bit (so I don't need to worry about usize)
+// ignore-tidy-linelength (the memcpy calls get long)
+
+#![crate_type = "lib"]
+#![feature(core_intrinsics)]
+
+use std::intrinsics::typed_swap;
+
+// CHECK-LABEL: @swap_unit(
+#[no_mangle]
+pub unsafe fn swap_unit(x: &mut (), y: &mut ()) {
+    // CHECK: start
+    // CHECK-NEXT: ret void
+    typed_swap(x, y)
+}
+
+// CHECK-LABEL: @swap_i32(
+#[no_mangle]
+pub unsafe fn swap_i32(x: &mut i32, y: &mut i32) {
+    // CHECK-NOT: alloca
+
+    // CHECK: %[[TEMP:.+]] = load i32, ptr %x, align 4
+    // CHECK-SAME: !noundef
+    // OPT0: %[[TEMP2:.+]] = load i32, ptr %y, align 4
+    // OPT0-SAME: !noundef
+    // OPT0: store i32 %[[TEMP2]], ptr %x, align 4
+    // OPT0-NOT: memcpy
+    // OPT3-NOT: load
+    // OPT3: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %x, ptr align 4 %y, i64 4, i1 false)
+    // CHECK: store i32 %[[TEMP]], ptr %y, align 4
+    // CHECK: ret void
+    typed_swap(x, y)
+}
+
+// CHECK-LABEL: @swap_pair(
+#[no_mangle]
+pub unsafe fn swap_pair(x: &mut (i32, u32), y: &mut (i32, u32)) {
+    // CHECK-NOT: alloca
+
+    // CHECK: load i32
+    // CHECK-SAME: !noundef
+    // CHECK: load i32
+    // CHECK-SAME: !noundef
+    // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %x, ptr align 4 %y, i64 8, i1 false)
+    // CHECK: store i32
+    // CHECK: store i32
+    typed_swap(x, y)
+}
+
+// CHECK-LABEL: @swap_str(
+#[no_mangle]
+pub unsafe fn swap_str<'a>(x: &mut &'a str, y: &mut &'a str) {
+    // CHECK-NOT: alloca
+
+    // CHECK: load ptr
+    // CHECK-SAME: !nonnull
+    // CHECK-SAME: !noundef
+    // CHECK: load i64
+    // CHECK-SAME: !noundef
+    // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %x, ptr align 8 %y, i64 16, i1 false)
+    // CHECK: store ptr
+    // CHECK: store i64
+    typed_swap(x, y)
+}
+
+// OPT0-LABEL: @swap_string(
+#[no_mangle]
+pub unsafe fn swap_string(x: &mut String, y: &mut String) {
+    // OPT0: %[[TEMP:.+]] = alloca {{.+}}, align 8
+    // OPT0: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %[[TEMP]], ptr align 8 %x, i64 24, i1 false)
+    // OPT0: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %x, ptr align 8 %y, i64 24, i1 false)
+    // OPT0: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %y, ptr align 8 %[[TEMP]], i64 24, i1 false)
+    typed_swap(x, y)
+}
diff --git a/tests/codegen/swap-small-types.rs b/tests/codegen/swap-small-types.rs
index 5fdf4a5804a..4dcfed2a53a 100644
--- a/tests/codegen/swap-small-types.rs
+++ b/tests/codegen/swap-small-types.rs
@@ -70,10 +70,7 @@ pub fn swap_slices<'a>(x: &mut &'a [u32], y: &mut &'a [u32]) {
     // CHECK-NOT: alloca
     // CHECK: load ptr
     // CHECK: load i64
-    // CHECK: load ptr
-    // CHECK: load i64
-    // CHECK: store ptr
-    // CHECK: store i64
+    // CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 16, i1 false)
     // CHECK: store ptr
     // CHECK: store i64
     swap(x, y)
diff --git a/tests/ui/consts/missing_span_in_backtrace.stderr b/tests/ui/consts/missing_span_in_backtrace.stderr
index 3e3e8e976be..9e0506e7e38 100644
--- a/tests/ui/consts/missing_span_in_backtrace.stderr
+++ b/tests/ui/consts/missing_span_in_backtrace.stderr
@@ -5,8 +5,6 @@ error[E0080]: evaluation of constant value failed
    |
 note: inside `std::ptr::read::<MaybeUninit<MaybeUninit<u8>>>`
   --> $SRC_DIR/core/src/ptr/mod.rs:LL:COL
-note: inside `mem::swap_simple::<MaybeUninit<MaybeUninit<u8>>>`
-  --> $SRC_DIR/core/src/mem/mod.rs:LL:COL
 note: inside `std::ptr::swap_nonoverlapping_simple_untyped::<MaybeUninit<u8>>`
   --> $SRC_DIR/core/src/ptr/mod.rs:LL:COL
 note: inside `swap_nonoverlapping::<MaybeUninit<u8>>`