53 files changed, 1242 insertions, 1063 deletions
diff --git a/compiler/rustc_codegen_gcc/src/back/lto.rs b/compiler/rustc_codegen_gcc/src/back/lto.rs
index 9d8ce2383f2..d29bba2570f 100644
--- a/compiler/rustc_codegen_gcc/src/back/lto.rs
+++ b/compiler/rustc_codegen_gcc/src/back/lto.rs
@@ -305,12 +305,9 @@ pub(crate) fn run_thin(
     )
 }
 
-pub(crate) fn prepare_thin(
-    module: ModuleCodegen<GccContext>,
-    _emit_summary: bool,
-) -> (String, ThinBuffer) {
+pub(crate) fn prepare_thin(module: ModuleCodegen<GccContext>) -> (String, ThinBuffer) {
     let name = module.name;
-    //let buffer = ThinBuffer::new(module.module_llvm.context, true, emit_summary);
+    //let buffer = ThinBuffer::new(module.module_llvm.context, true);
     let buffer = ThinBuffer::new(&module.module_llvm.context);
     (name, buffer)
 }
@@ -650,10 +647,6 @@ impl ThinBufferMethods for ThinBuffer {
     fn data(&self) -> &[u8] {
         &[]
     }
-
-    fn thin_link_data(&self) -> &[u8] {
-        unimplemented!();
-    }
 }
 
 pub struct ThinData; //(Arc<TempDir>);
diff --git a/compiler/rustc_codegen_gcc/src/lib.rs b/compiler/rustc_codegen_gcc/src/lib.rs
index 2d7df79ba95..f76f933cad4 100644
--- a/compiler/rustc_codegen_gcc/src/lib.rs
+++ b/compiler/rustc_codegen_gcc/src/lib.rs
@@ -408,11 +408,8 @@ impl WriteBackendMethods for GccCodegenBackend {
         back::write::codegen(cgcx, module, config)
     }
 
-    fn prepare_thin(
-        module: ModuleCodegen<Self::Module>,
-        emit_summary: bool,
-    ) -> (String, Self::ThinBuffer) {
-        back::lto::prepare_thin(module, emit_summary)
+    fn prepare_thin(module: ModuleCodegen<Self::Module>) -> (String, Self::ThinBuffer) {
+        back::lto::prepare_thin(module)
     }
 
     fn serialize_module(_module: ModuleCodegen<Self::Module>) -> (String, Self::ModuleBuffer) {
diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs
index ad2e722cfef..f571716d9dd 100644
--- a/compiler/rustc_codegen_llvm/src/back/lto.rs
+++ b/compiler/rustc_codegen_llvm/src/back/lto.rs
@@ -185,12 +185,9 @@ pub(crate) fn run_thin(
     thin_lto(cgcx, dcx, modules, upstream_modules, cached_modules, &symbols_below_threshold)
 }
 
-pub(crate) fn prepare_thin(
-    module: ModuleCodegen<ModuleLlvm>,
-    emit_summary: bool,
-) -> (String, ThinBuffer) {
+pub(crate) fn prepare_thin(module: ModuleCodegen<ModuleLlvm>) -> (String, ThinBuffer) {
     let name = module.name;
-    let buffer = ThinBuffer::new(module.module_llvm.llmod(), true, emit_summary);
+    let buffer = ThinBuffer::new(module.module_llvm.llmod(), true);
     (name, buffer)
 }
 
@@ -687,9 +684,9 @@ unsafe impl Send for ThinBuffer {}
 unsafe impl Sync for ThinBuffer {}
 
 impl ThinBuffer {
-    pub(crate) fn new(m: &llvm::Module, is_thin: bool, emit_summary: bool) -> ThinBuffer {
+    pub(crate) fn new(m: &llvm::Module, is_thin: bool) -> ThinBuffer {
         unsafe {
-            let buffer = llvm::LLVMRustThinLTOBufferCreate(m, is_thin, emit_summary);
+            let buffer = llvm::LLVMRustThinLTOBufferCreate(m, is_thin);
             ThinBuffer(buffer)
         }
     }
@@ -698,21 +695,21 @@ impl ThinBuffer {
         let mut ptr = NonNull::new(ptr).unwrap();
         ThinBuffer(unsafe { ptr.as_mut() })
     }
-}
 
-impl ThinBufferMethods for ThinBuffer {
-    fn data(&self) -> &[u8] {
+    pub(crate) fn thin_link_data(&self) -> &[u8] {
         unsafe {
-            let ptr = llvm::LLVMRustThinLTOBufferPtr(self.0) as *const _;
-            let len = llvm::LLVMRustThinLTOBufferLen(self.0);
+            let ptr = llvm::LLVMRustThinLTOBufferThinLinkDataPtr(self.0) as *const _;
+            let len = llvm::LLVMRustThinLTOBufferThinLinkDataLen(self.0);
             slice::from_raw_parts(ptr, len)
         }
     }
+}
 
-    fn thin_link_data(&self) -> &[u8] {
+impl ThinBufferMethods for ThinBuffer {
+    fn data(&self) -> &[u8] {
         unsafe {
-            let ptr = llvm::LLVMRustThinLTOBufferThinLinkDataPtr(self.0) as *const _;
-            let len = llvm::LLVMRustThinLTOBufferThinLinkDataLen(self.0);
+            let ptr = llvm::LLVMRustThinLTOBufferPtr(self.0) as *const _;
+            let len = llvm::LLVMRustThinLTOBufferLen(self.0);
             slice::from_raw_parts(ptr, len)
         }
     }
diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs
index 7ea2ae6673b..423f0da4878 100644
--- a/compiler/rustc_codegen_llvm/src/back/write.rs
+++ b/compiler/rustc_codegen_llvm/src/back/write.rs
@@ -837,7 +837,7 @@ pub(crate) fn codegen(
                         "LLVM_module_codegen_make_bitcode",
                         &*module.name,
                     );
-                    ThinBuffer::new(llmod, config.emit_thin_lto, false)
+                    ThinBuffer::new(llmod, config.emit_thin_lto)
                 };
                 let data = thin.data();
                 let _timer = cgcx
diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
index 628cb34fd9e..6fb23d09843 100644
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -211,11 +211,8 @@ impl WriteBackendMethods for LlvmCodegenBackend {
     ) -> CompiledModule {
         back::write::codegen(cgcx, module, config)
     }
-    fn prepare_thin(
-        module: ModuleCodegen<Self::Module>,
-        emit_summary: bool,
-    ) -> (String, Self::ThinBuffer) {
-        back::lto::prepare_thin(module, emit_summary)
+    fn prepare_thin(module: ModuleCodegen<Self::Module>) -> (String, Self::ThinBuffer) {
+        back::lto::prepare_thin(module)
     }
     fn serialize_module(module: ModuleCodegen<Self::Module>) -> (String, Self::ModuleBuffer) {
         (module.name, back::lto::ModuleBuffer::new(module.module_llvm.llmod()))
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index b66fc157b3c..0679f55ab7f 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -2602,7 +2602,6 @@ unsafe extern "C" {
     pub(crate) fn LLVMRustThinLTOBufferCreate(
         M: &Module,
         is_thin: bool,
-        emit_summary: bool,
     ) -> &'static mut ThinLTOBuffer;
     pub(crate) fn LLVMRustThinLTOBufferFree(M: &'static mut ThinLTOBuffer);
     pub(crate) fn LLVMRustThinLTOBufferPtr(M: &ThinLTOBuffer) -> *const c_char;
diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index 95e02a7c6db..cbaf67d7345 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -830,6 +830,8 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
     mut module: ModuleCodegen<B::Module>,
 ) -> WorkItemResult<B> {
+    let _timer = cgcx.prof.generic_activity_with_arg("codegen_module_optimize", &*module.name);
+
     let dcx = cgcx.create_dcx();
     let dcx = dcx.handle();
 
@@ -862,7 +864,7 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
             WorkItemResult::Finished(module)
         }
         ComputedLtoType::Thin => {
-            let (name, thin_buffer) = B::prepare_thin(module, false);
+            let (name, thin_buffer) = B::prepare_thin(module);
             if let Some(path) = bitcode {
                 fs::write(&path, thin_buffer.data()).unwrap_or_else(|e| {
                     panic!("Error writing pre-lto-bitcode file `{}`: {}", path.display(), e);
@@ -890,6 +892,10 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
     module: CachedModuleCodegen,
 ) -> WorkItemResult<B> {
+    let _timer = cgcx
+        .prof
+        .generic_activity_with_arg("codegen_copy_artifacts_from_incr_cache", &*module.name);
+
     let incr_comp_session_dir = cgcx.incr_comp_session_dir.as_ref().unwrap();
 
     let mut links_from_incr_cache = Vec::new();
@@ -977,6 +983,8 @@ fn execute_fat_lto_work_item<B: ExtraBackendMethods>(
     mut needs_fat_lto: Vec<FatLtoInput<B>>,
     import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
 ) -> WorkItemResult<B> {
+    let _timer = cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", "everything");
+
     for (module, wp) in import_only_modules {
         needs_fat_lto.push(FatLtoInput::Serialized { name: wp.cgu_name, buffer: module })
     }
@@ -995,6 +1003,8 @@ fn execute_thin_lto_work_item<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
     module: lto::ThinModule<B>,
 ) -> WorkItemResult<B> {
+    let _timer = cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", module.name());
+
     let module = B::optimize_thin(cgcx, module);
     let module = B::codegen(cgcx, module, &cgcx.module_config);
     WorkItemResult::Finished(module)
@@ -1714,38 +1724,21 @@ fn spawn_work<'a, B: ExtraBackendMethods>(
 
     B::spawn_named_thread(cgcx.time_trace, work.short_description(), move || {
         let result = std::panic::catch_unwind(AssertUnwindSafe(|| match work {
-            WorkItem::Optimize(m) => {
-                let _timer =
-                    cgcx.prof.generic_activity_with_arg("codegen_module_optimize", &*m.name);
-                execute_optimize_work_item(&cgcx, m)
-            }
-            WorkItem::CopyPostLtoArtifacts(m) => {
-                let _timer = cgcx
-                    .prof
-                    .generic_activity_with_arg("codegen_copy_artifacts_from_incr_cache", &*m.name);
-                execute_copy_from_cache_work_item(&cgcx, m)
-            }
+            WorkItem::Optimize(m) => execute_optimize_work_item(&cgcx, m),
+            WorkItem::CopyPostLtoArtifacts(m) => execute_copy_from_cache_work_item(&cgcx, m),
             WorkItem::FatLto {
                 exported_symbols_for_lto,
                 each_linked_rlib_for_lto,
                 needs_fat_lto,
                 import_only_modules,
-            } => {
-                let _timer =
-                    cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", "everything");
-                execute_fat_lto_work_item(
-                    &cgcx,
-                    &exported_symbols_for_lto,
-                    &each_linked_rlib_for_lto,
-                    needs_fat_lto,
-                    import_only_modules,
-                )
-            }
-            WorkItem::ThinLto(m) => {
-                let _timer =
-                    cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", m.name());
-                execute_thin_lto_work_item(&cgcx, m)
-            }
+            } => execute_fat_lto_work_item(
+                &cgcx,
+                &exported_symbols_for_lto,
+                &each_linked_rlib_for_lto,
+                needs_fat_lto,
+                import_only_modules,
+            ),
+            WorkItem::ThinLto(m) => execute_thin_lto_work_item(&cgcx, m),
         }));
 
         let msg = match result {
diff --git a/compiler/rustc_codegen_ssa/src/traits/write.rs b/compiler/rustc_codegen_ssa/src/traits/write.rs
index cc7c4e46d7b..1ac1d7ef2e2 100644
--- a/compiler/rustc_codegen_ssa/src/traits/write.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/write.rs
@@ -50,16 +50,12 @@ pub trait WriteBackendMethods: Clone + 'static {
         module: ModuleCodegen<Self::Module>,
         config: &ModuleConfig,
     ) -> CompiledModule;
-    fn prepare_thin(
-        module: ModuleCodegen<Self::Module>,
-        want_summary: bool,
-    ) -> (String, Self::ThinBuffer);
+    fn prepare_thin(module: ModuleCodegen<Self::Module>) -> (String, Self::ThinBuffer);
     fn serialize_module(module: ModuleCodegen<Self::Module>) -> (String, Self::ModuleBuffer);
 }
 
 pub trait ThinBufferMethods: Send + Sync {
     fn data(&self) -> &[u8];
-    fn thin_link_data(&self) -> &[u8];
 }
 
 pub trait ModuleBufferMethods: Send + Sync {
diff --git a/compiler/rustc_index/src/interval.rs b/compiler/rustc_index/src/interval.rs
index 0225c5c4f32..dda5253e7c5 100644
--- a/compiler/rustc_index/src/interval.rs
+++ b/compiler/rustc_index/src/interval.rs
@@ -140,6 +140,30 @@ impl<I: Idx> IntervalSet<I> {
         result
     }
 
+    /// Specialized version of `insert` when we know that the inserted point is *after* any
+    /// contained.
+    pub fn append(&mut self, point: I) {
+        let point = point.index() as u32;
+
+        if let Some((_, last_end)) = self.map.last_mut() {
+            assert!(*last_end <= point);
+            if point == *last_end {
+                // The point is already in the set.
+            } else if point == *last_end + 1 {
+                *last_end = point;
+            } else {
+                self.map.push((point, point));
+            }
+        } else {
+            self.map.push((point, point));
+        }
+
+        debug_assert!(
+            self.check_invariants(),
+            "wrong intervals after append {point:?} to {self:?}"
+        );
+    }
+
     pub fn contains(&self, needle: I) -> bool {
         let needle = needle.index() as u32;
         let Some(last) = self.map.partition_point(|r| r.0 <= needle).checked_sub(1) else {
@@ -176,6 +200,32 @@ impl<I: Idx> IntervalSet<I> {
         })
     }
 
+    pub fn disjoint(&self, other: &IntervalSet<I>) -> bool
+    where
+        I: Step,
+    {
+        let helper = move || {
+            let mut self_iter = self.iter_intervals();
+            let mut other_iter = other.iter_intervals();
+
+            let mut self_current = self_iter.next()?;
+            let mut other_current = other_iter.next()?;
+
+            loop {
+                if self_current.end <= other_current.start {
+                    self_current = self_iter.next()?;
+                    continue;
+                }
+                if other_current.end <= self_current.start {
+                    other_current = other_iter.next()?;
+                    continue;
+                }
+                return Some(false);
+            }
+        };
+        helper().unwrap_or(true)
+    }
+
     pub fn is_empty(&self) -> bool {
         self.map.is_empty()
     }
@@ -325,6 +375,10 @@ impl<R: Idx, C: Step + Idx> SparseIntervalMatrix<R, C> {
         self.ensure_row(row).insert(point)
     }
 
+    pub fn append(&mut self, row: R, point: C) {
+        self.ensure_row(row).append(point)
+    }
+
     pub fn contains(&self, row: R, point: C) -> bool {
         self.row(row).is_some_and(|r| r.contains(point))
     }
diff --git a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp
index dd492325814..3bb1533c2fe 100644
--- a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp
+++ b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp
@@ -1568,12 +1568,11 @@ extern "C" bool LLVMRustPrepareThinLTOImport(const LLVMRustThinLTOData *Data,
   return true;
 }
 
-extern "C" LLVMRustThinLTOBuffer *
-LLVMRustThinLTOBufferCreate(LLVMModuleRef M, bool is_thin, bool emit_summary) {
+extern "C" LLVMRustThinLTOBuffer *LLVMRustThinLTOBufferCreate(LLVMModuleRef M,
+                                                              bool is_thin) {
   auto Ret = std::make_unique<LLVMRustThinLTOBuffer>();
   {
     auto OS = raw_string_ostream(Ret->data);
-    auto ThinLinkOS = raw_string_ostream(Ret->thin_link_data);
     {
       if (is_thin) {
         PassBuilder PB;
@@ -1587,11 +1586,7 @@ LLVMRustThinLTOBufferCreate(LLVMModuleRef M, bool is_thin, bool emit_summary) {
         PB.registerLoopAnalyses(LAM);
         PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
         ModulePassManager MPM;
-        // We only pass ThinLinkOS to be filled in if we want the summary,
-        // because otherwise LLVM does extra work and may double-emit some
-        // errors or warnings.
-        MPM.addPass(
-            ThinLTOBitcodeWriterPass(OS, emit_summary ? &ThinLinkOS : nullptr));
+        MPM.addPass(ThinLTOBitcodeWriterPass(OS, nullptr));
         MPM.run(*unwrap(M), MAM);
       } else {
         WriteBitcodeToFile(*unwrap(M), OS);
diff --git a/compiler/rustc_middle/src/mir/visit.rs b/compiler/rustc_middle/src/mir/visit.rs
index 904d78d69b6..b498b7b8912 100644
--- a/compiler/rustc_middle/src/mir/visit.rs
+++ b/compiler/rustc_middle/src/mir/visit.rs
@@ -1415,6 +1415,24 @@ impl PlaceContext {
         )
     }
 
+    /// Returns `true` if this place context may be used to know the address of the given place.
+    #[inline]
+    pub fn may_observe_address(self) -> bool {
+        matches!(
+            self,
+            PlaceContext::NonMutatingUse(
+                NonMutatingUseContext::SharedBorrow
+                    | NonMutatingUseContext::RawBorrow
+                    | NonMutatingUseContext::FakeBorrow
+            ) | PlaceContext::MutatingUse(
+                MutatingUseContext::Drop
+                    | MutatingUseContext::Borrow
+                    | MutatingUseContext::RawBorrow
+                    | MutatingUseContext::AsmOutput
+            )
+        )
+    }
+
     /// Returns `true` if this place context represents a storage live or storage dead marker.
     #[inline]
     pub fn is_storage_marker(self) -> bool {
diff --git a/compiler/rustc_mir_dataflow/src/impls/liveness.rs b/compiler/rustc_mir_dataflow/src/impls/liveness.rs
index 6ec1b03a34e..5eba474a60c 100644
--- a/compiler/rustc_mir_dataflow/src/impls/liveness.rs
+++ b/compiler/rustc_mir_dataflow/src/impls/liveness.rs
@@ -92,7 +92,7 @@ impl<'tcx> Visitor<'tcx> for TransferFunction<'_> {
         }
 
         match DefUse::for_place(*place, context) {
-            Some(DefUse::Def) => {
+            DefUse::Def => {
                 if let PlaceContext::MutatingUse(
                     MutatingUseContext::Call | MutatingUseContext::AsmOutput,
                 ) = context
@@ -105,8 +105,8 @@ impl<'tcx> Visitor<'tcx> for TransferFunction<'_> {
                     self.0.kill(place.local);
                 }
             }
-            Some(DefUse::Use) => self.0.gen_(place.local),
-            None => {}
+            DefUse::Use => self.0.gen_(place.local),
+            DefUse::PartialWrite | DefUse::NonUse => {}
         }
 
         self.visit_projection(place.as_ref(), context, location);
@@ -131,23 +131,29 @@ impl<'tcx> Visitor<'tcx> for YieldResumeEffect<'_> {
 }
 
 #[derive(Eq, PartialEq, Clone)]
-enum DefUse {
+pub enum DefUse {
+    /// Full write to the local.
     Def,
+    /// Read of any part of the local.
     Use,
+    /// Partial write to the local.
+    PartialWrite,
+    /// Non-use, like debuginfo.
+    NonUse,
 }
 
 impl DefUse {
     fn apply(state: &mut DenseBitSet<Local>, place: Place<'_>, context: PlaceContext) {
         match DefUse::for_place(place, context) {
-            Some(DefUse::Def) => state.kill(place.local),
-            Some(DefUse::Use) => state.gen_(place.local),
-            None => {}
+            DefUse::Def => state.kill(place.local),
+            DefUse::Use => state.gen_(place.local),
+            DefUse::PartialWrite | DefUse::NonUse => {}
         }
     }
 
-    fn for_place(place: Place<'_>, context: PlaceContext) -> Option<DefUse> {
+    pub fn for_place(place: Place<'_>, context: PlaceContext) -> DefUse {
         match context {
-            PlaceContext::NonUse(_) => None,
+            PlaceContext::NonUse(_) => DefUse::NonUse,
 
             PlaceContext::MutatingUse(
                 MutatingUseContext::Call
@@ -156,21 +162,20 @@ impl DefUse {
                 | MutatingUseContext::Store
                 | MutatingUseContext::Deinit,
             ) => {
+                // Treat derefs as a use of the base local. `*p = 4` is not a def of `p` but a use.
                 if place.is_indirect() {
-                    // Treat derefs as a use of the base local. `*p = 4` is not a def of `p` but a
-                    // use.
-                    Some(DefUse::Use)
+                    DefUse::Use
                 } else if place.projection.is_empty() {
-                    Some(DefUse::Def)
+                    DefUse::Def
                 } else {
-                    None
+                    DefUse::PartialWrite
                 }
             }
 
             // Setting the discriminant is not a use because it does no reading, but it is also not
             // a def because it does not overwrite the whole place
             PlaceContext::MutatingUse(MutatingUseContext::SetDiscriminant) => {
-                place.is_indirect().then_some(DefUse::Use)
+                if place.is_indirect() { DefUse::Use } else { DefUse::PartialWrite }
             }
 
             // All other contexts are uses...
@@ -188,7 +193,7 @@ impl DefUse {
                 | NonMutatingUseContext::PlaceMention
                 | NonMutatingUseContext::FakeBorrow
                 | NonMutatingUseContext::SharedBorrow,
-            ) => Some(DefUse::Use),
+            ) => DefUse::Use,
 
             PlaceContext::MutatingUse(MutatingUseContext::Projection)
             | PlaceContext::NonMutatingUse(NonMutatingUseContext::Projection) => {
diff --git a/compiler/rustc_mir_dataflow/src/impls/mod.rs b/compiler/rustc_mir_dataflow/src/impls/mod.rs
index 3f29b819a6d..6d573e1c00e 100644
--- a/compiler/rustc_mir_dataflow/src/impls/mod.rs
+++ b/compiler/rustc_mir_dataflow/src/impls/mod.rs
@@ -9,7 +9,8 @@ pub use self::initialized::{
     MaybeUninitializedPlaces, MaybeUninitializedPlacesDomain,
 };
 pub use self::liveness::{
-    MaybeLiveLocals, MaybeTransitiveLiveLocals, TransferFunction as LivenessTransferFunction,
+    DefUse, MaybeLiveLocals, MaybeTransitiveLiveLocals,
+    TransferFunction as LivenessTransferFunction,
 };
 pub use self::storage_liveness::{
     MaybeRequiresStorage, MaybeStorageDead, MaybeStorageLive, always_storage_live_locals,
diff --git a/compiler/rustc_mir_dataflow/src/points.rs b/compiler/rustc_mir_dataflow/src/points.rs
index 70d1a34b5fb..e3d1e04a319 100644
--- a/compiler/rustc_mir_dataflow/src/points.rs
+++ b/compiler/rustc_mir_dataflow/src/points.rs
@@ -1,9 +1,5 @@
-use rustc_index::bit_set::DenseBitSet;
-use rustc_index::interval::SparseIntervalMatrix;
 use rustc_index::{Idx, IndexVec};
-use rustc_middle::mir::{self, BasicBlock, Body, Location};
-
-use crate::framework::{Analysis, Results, ResultsVisitor, visit_results};
+use rustc_middle::mir::{BasicBlock, Body, Location};
 
 /// Maps between a `Location` and a `PointIndex` (and vice versa).
 pub struct DenseLocationMap {
@@ -93,65 +89,3 @@ rustc_index::newtype_index! {
     #[debug_format = "PointIndex({})"]
     pub struct PointIndex {}
 }
-
-/// Add points depending on the result of the given dataflow analysis.
-pub fn save_as_intervals<'tcx, N, A>(
-    elements: &DenseLocationMap,
-    body: &mir::Body<'tcx>,
-    mut analysis: A,
-    results: Results<A::Domain>,
-) -> SparseIntervalMatrix<N, PointIndex>
-where
-    N: Idx,
-    A: Analysis<'tcx, Domain = DenseBitSet<N>>,
-{
-    let values = SparseIntervalMatrix::new(elements.num_points());
-    let mut visitor = Visitor { elements, values };
-    visit_results(
-        body,
-        body.basic_blocks.reverse_postorder().iter().copied(),
-        &mut analysis,
-        &results,
-        &mut visitor,
-    );
-    visitor.values
-}
-
-struct Visitor<'a, N: Idx> {
-    elements: &'a DenseLocationMap,
-    values: SparseIntervalMatrix<N, PointIndex>,
-}
-
-impl<'tcx, A, N> ResultsVisitor<'tcx, A> for Visitor<'_, N>
-where
-    A: Analysis<'tcx, Domain = DenseBitSet<N>>,
-    N: Idx,
-{
-    fn visit_after_primary_statement_effect<'mir>(
-        &mut self,
-        _analysis: &mut A,
-        state: &A::Domain,
-        _statement: &'mir mir::Statement<'tcx>,
-        location: Location,
-    ) {
-        let point = self.elements.point_from_location(location);
-        // Use internal iterator manually as it is much more efficient.
-        state.iter().for_each(|node| {
-            self.values.insert(node, point);
-        });
-    }
-
-    fn visit_after_primary_terminator_effect<'mir>(
-        &mut self,
-        _analysis: &mut A,
-        state: &A::Domain,
-        _terminator: &'mir mir::Terminator<'tcx>,
-        location: Location,
-    ) {
-        let point = self.elements.point_from_location(location);
-        // Use internal iterator manually as it is much more efficient.
-        state.iter().for_each(|node| {
-            self.values.insert(node, point);
-        });
-    }
-}
diff --git a/compiler/rustc_mir_dataflow/src/value_analysis.rs b/compiler/rustc_mir_dataflow/src/value_analysis.rs
index 005e7973130..9a00831dc01 100644
--- a/compiler/rustc_mir_dataflow/src/value_analysis.rs
+++ b/compiler/rustc_mir_dataflow/src/value_analysis.rs
@@ -6,7 +6,7 @@ use rustc_data_structures::fx::{FxHashMap, FxIndexSet, StdEntry};
 use rustc_data_structures::stack::ensure_sufficient_stack;
 use rustc_index::IndexVec;
 use rustc_index::bit_set::DenseBitSet;
-use rustc_middle::mir::visit::{MutatingUseContext, PlaceContext, Visitor};
+use rustc_middle::mir::visit::{PlaceContext, Visitor};
 use rustc_middle::mir::*;
 use rustc_middle::ty::{self, Ty, TyCtxt};
 use tracing::debug;
@@ -917,12 +917,7 @@ pub fn excluded_locals(body: &Body<'_>) -> DenseBitSet<Local> {
 
     impl<'tcx> Visitor<'tcx> for Collector {
         fn visit_place(&mut self, place: &Place<'tcx>, context: PlaceContext, _location: Location) {
-            if (context.is_borrow()
-                || context.is_address_of()
-                || context.is_drop()
-                || context == PlaceContext::MutatingUse(MutatingUseContext::AsmOutput))
-                && !place.is_indirect()
-            {
+            if context.may_observe_address() && !place.is_indirect() {
                 // A pointer to a place could be used to access other places with the same local,
                 // hence we have to exclude the local completely.
                 self.result.insert(place.local);
diff --git a/compiler/rustc_mir_transform/src/cross_crate_inline.rs b/compiler/rustc_mir_transform/src/cross_crate_inline.rs
index b186c2bd775..df98c07f549 100644
--- a/compiler/rustc_mir_transform/src/cross_crate_inline.rs
+++ b/compiler/rustc_mir_transform/src/cross_crate_inline.rs
@@ -135,7 +135,16 @@ impl<'tcx> Visitor<'tcx> for CostChecker<'_, 'tcx> {
                     }
                 }
             }
-            TerminatorKind::Call { unwind, .. } => {
+            TerminatorKind::Call { ref func, unwind, .. } => {
+                // We track calls because they make our function not a leaf (and in theory, the
+                // number of calls indicates how likely this function is to perturb other CGUs).
+                // But intrinsics don't have a body that gets assigned to a CGU, so they are
+                // ignored.
+                if let Some((fn_def_id, _)) = func.const_fn_def()
+                    && self.tcx.has_attr(fn_def_id, sym::rustc_intrinsic)
+                {
+                    return;
+                }
                 self.calls += 1;
                 if let UnwindAction::Cleanup(_) = unwind {
                     self.landing_pads += 1;
diff --git a/compiler/rustc_mir_transform/src/dest_prop.rs b/compiler/rustc_mir_transform/src/dest_prop.rs
index cf7425251e8..9ba2d274691 100644
--- a/compiler/rustc_mir_transform/src/dest_prop.rs
+++ b/compiler/rustc_mir_transform/src/dest_prop.rs
@@ -59,6 +59,12 @@
 //! The first two conditions are simple structural requirements on the `Assign` statements that can
 //! be trivially checked. The third requirement however is more difficult and costly to check.
 //!
+//! ## Current implementation
+//!
+//! The current implementation relies on live range computation to check for conflicts. We only
+//! allow to merge locals that have disjoint live ranges. The live range are defined with
+//! half-statement granularity, so as to make all writes be live for at least a half statement.
+//!
 //! ## Future Improvements
 //!
 //! There are a number of ways in which this pass could be improved in the future:
@@ -117,9 +123,8 @@
 //! - Layout optimizations for coroutines have been added to improve code generation for
 //!   async/await, which are very similar in spirit to what this optimization does.
 //!
-//! Also, rustc now has a simple NRVO pass (see `nrvo.rs`), which handles a subset of the cases that
-//! this destination propagation pass handles, proving that similar optimizations can be performed
-//! on MIR.
+//! [The next approach][attempt 4] computes a conflict matrix between locals by forbidding merging
+//! locals with competing writes or with one write while the other is live.
 //!
 //! ## Pre/Post Optimization
 //!
@@ -130,20 +135,18 @@
 //! [attempt 1]: https://github.com/rust-lang/rust/pull/47954
 //! [attempt 2]: https://github.com/rust-lang/rust/pull/71003
 //! [attempt 3]: https://github.com/rust-lang/rust/pull/72632
+//! [attempt 4]: https://github.com/rust-lang/rust/pull/96451
 
-use rustc_data_structures::fx::{FxIndexMap, IndexEntry, IndexOccupiedEntry};
+use rustc_data_structures::union_find::UnionFind;
 use rustc_index::bit_set::DenseBitSet;
 use rustc_index::interval::SparseIntervalMatrix;
-use rustc_middle::bug;
+use rustc_index::{IndexVec, newtype_index};
 use rustc_middle::mir::visit::{MutVisitor, PlaceContext, Visitor};
-use rustc_middle::mir::{
-    Body, HasLocalDecls, InlineAsmOperand, Local, LocalKind, Location, MirDumper, Operand,
-    PassWhere, Place, Rvalue, Statement, StatementKind, TerminatorKind, traversal,
-};
+use rustc_middle::mir::*;
 use rustc_middle::ty::TyCtxt;
-use rustc_mir_dataflow::Analysis;
-use rustc_mir_dataflow::impls::MaybeLiveLocals;
-use rustc_mir_dataflow::points::{DenseLocationMap, PointIndex, save_as_intervals};
+use rustc_mir_dataflow::impls::{DefUse, MaybeLiveLocals};
+use rustc_mir_dataflow::points::DenseLocationMap;
+use rustc_mir_dataflow::{Analysis, Results};
 use tracing::{debug, trace};
 
 pub(super) struct DestinationPropagation;
@@ -161,84 +164,81 @@ impl<'tcx> crate::MirPass<'tcx> for DestinationPropagation {
         sess.mir_opt_level() >= 3
     }
 
+    #[tracing::instrument(level = "trace", skip(self, tcx, body))]
     fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
         let def_id = body.source.def_id();
-        let mut candidates = Candidates::default();
-        let mut write_info = WriteInfo::default();
-        trace!(func = ?tcx.def_path_str(def_id));
+        trace!(?def_id);
 
         let borrowed = rustc_mir_dataflow::impls::borrowed_locals(body);
 
+        let candidates = Candidates::find(body, &borrowed);
+        trace!(?candidates);
+        if candidates.c.is_empty() {
+            return;
+        }
+
         let live = MaybeLiveLocals.iterate_to_fixpoint(tcx, body, Some("MaybeLiveLocals-DestProp"));
+
         let points = DenseLocationMap::new(body);
-        let mut live = save_as_intervals(&points, body, live.analysis, live.results);
-
-        // In order to avoid having to collect data for every single pair of locals in the body, we
-        // do not allow doing more than one merge for places that are derived from the same local at
-        // once. To avoid missed opportunities, we instead iterate to a fixed point - we'll refer to
-        // each of these iterations as a "round."
-        //
-        // Reaching a fixed point could in theory take up to `min(l, s)` rounds - however, we do not
-        // expect to see MIR like that. To verify this, a test was run against `[rust-lang/regex]` -
-        // the average MIR body saw 1.32 full iterations of this loop. The most that was hit were 30
-        // for a single function. Only 80/2801 (2.9%) of functions saw at least 5.
-        //
-        // [rust-lang/regex]:
-        //     https://github.com/rust-lang/regex/tree/b5372864e2df6a2f5e543a556a62197f50ca3650
-        let mut round_count = 0;
-        loop {
-            // PERF: Can we do something smarter than recalculating the candidates and liveness
-            // results?
-            candidates.reset_and_find(body, &borrowed);
-            trace!(?candidates);
-            dest_prop_mir_dump(tcx, body, &points, &live, round_count);
-
-            FilterInformation::filter_liveness(
-                &mut candidates,
-                &points,
-                &live,
-                &mut write_info,
-                body,
-            );
-
-            // Because we only filter once per round, it is unsound to use a local for more than
-            // one merge operation within a single round of optimizations. We store here which ones
-            // we have already used.
-            let mut merged_locals: DenseBitSet<Local> =
-                DenseBitSet::new_empty(body.local_decls.len());
-
-            // This is the set of merges we will apply this round. It is a subset of the candidates.
-            let mut merges = FxIndexMap::default();
-
-            for (src, candidates) in candidates.c.iter() {
-                if merged_locals.contains(*src) {
-                    continue;
-                }
-                let Some(dest) = candidates.iter().find(|dest| !merged_locals.contains(**dest))
-                else {
-                    continue;
-                };
-
-                // Replace `src` by `dest` everywhere.
-                merges.insert(*src, *dest);
-                merged_locals.insert(*src);
-                merged_locals.insert(*dest);
-
-                // Update liveness information based on the merge we just performed.
-                // Every location where `src` was live, `dest` will be live.
-                live.union_rows(*src, *dest);
+        let mut relevant = RelevantLocals::compute(&candidates, body.local_decls.len());
+        let mut live = save_as_intervals(&points, body, &relevant, live.results);
+
+        dest_prop_mir_dump(tcx, body, &points, &live, &relevant);
+
+        let mut merged_locals = DenseBitSet::new_empty(body.local_decls.len());
+
+        for (src, dst) in candidates.c.into_iter() {
+            trace!(?src, ?dst);
+
+            let Some(mut src) = relevant.find(src) else { continue };
+            let Some(mut dst) = relevant.find(dst) else { continue };
+            if src == dst {
+                continue;
             }
-            trace!(merging = ?merges);
 
-            if merges.is_empty() {
-                break;
+            let Some(src_live_ranges) = live.row(src) else { continue };
+            let Some(dst_live_ranges) = live.row(dst) else { continue };
+            trace!(?src, ?src_live_ranges);
+            trace!(?dst, ?dst_live_ranges);
+
+            if src_live_ranges.disjoint(dst_live_ranges) {
+                // We want to replace `src` by `dst`.
+                let mut orig_src = relevant.original[src];
+                let mut orig_dst = relevant.original[dst];
+
+                // The return place and function arguments are required and cannot be renamed.
+                // This check cannot be made during candidate collection, as we may want to
+                // unify the same non-required local with several required locals.
+                match (is_local_required(orig_src, body), is_local_required(orig_dst, body)) {
+                    // Renaming `src` is ok.
+                    (false, _) => {}
+                    // Renaming `src` is wrong, but renaming `dst` is ok.
+                    (true, false) => {
+                        std::mem::swap(&mut src, &mut dst);
+                        std::mem::swap(&mut orig_src, &mut orig_dst);
+                    }
+                    // Neither local can be renamed, so skip this case.
+                    (true, true) => continue,
+                }
+
+                trace!(?src, ?dst, "merge");
+                merged_locals.insert(orig_src);
+                merged_locals.insert(orig_dst);
+
+                // Replace `src` by `dst`.
+                let head = relevant.union(src, dst);
+                live.union_rows(/* read */ src, /* write */ head);
+                live.union_rows(/* read */ dst, /* write */ head);
             }
-            round_count += 1;
+        }
+        trace!(?merged_locals);
+        trace!(?relevant.renames);
 
-            apply_merges(body, tcx, merges, merged_locals);
+        if merged_locals.is_empty() {
+            return;
         }
 
-        trace!(round_count);
+        apply_merges(body, tcx, relevant, merged_locals);
     }
 
     fn is_required(&self) -> bool {
@@ -246,30 +246,6 @@ impl<'tcx> crate::MirPass<'tcx> for DestinationPropagation {
     }
 }
 
-#[derive(Debug, Default)]
-struct Candidates {
-    /// The set of candidates we are considering in this optimization.
-    ///
-    /// We will always merge the key into at most one of its values.
-    ///
-    /// Whether a place ends up in the key or the value does not correspond to whether it appears as
-    /// the lhs or rhs of any assignment. As a matter of fact, the places in here might never appear
-    /// in an assignment at all. This happens because if we see an assignment like this:
-    ///
-    /// ```ignore (syntax-highlighting-only)
-    /// _1.0 = _2.0
-    /// ```
-    ///
-    /// We will still report that we would like to merge `_1` and `_2` in an attempt to allow us to
-    /// remove that assignment.
-    c: FxIndexMap<Local, Vec<Local>>,
-
-    /// A reverse index of the `c` set; if the `c` set contains `a => Place { local: b, proj }`,
-    /// then this contains `b => a`.
-    // PERF: Possibly these should be `SmallVec`s?
-    reverse: FxIndexMap<Local, Vec<Local>>,
-}
-
 //////////////////////////////////////////////////////////
 // Merging
 //
@@ -278,16 +254,16 @@ struct Candidates {
 fn apply_merges<'tcx>(
     body: &mut Body<'tcx>,
     tcx: TyCtxt<'tcx>,
-    merges: FxIndexMap<Local, Local>,
+    relevant: RelevantLocals,
     merged_locals: DenseBitSet<Local>,
 ) {
-    let mut merger = Merger { tcx, merges, merged_locals };
+    let mut merger = Merger { tcx, relevant, merged_locals };
     merger.visit_body_preserves_cfg(body);
 }
 
 struct Merger<'tcx> {
     tcx: TyCtxt<'tcx>,
-    merges: FxIndexMap<Local, Local>,
+    relevant: RelevantLocals,
     merged_locals: DenseBitSet<Local>,
 }
 
@@ -297,8 +273,8 @@ impl<'tcx> MutVisitor<'tcx> for Merger<'tcx> {
     }
 
     fn visit_local(&mut self, local: &mut Local, _: PlaceContext, _location: Location) {
-        if let Some(dest) = self.merges.get(local) {
-            *local = *dest;
+        if let Some(relevant) = self.relevant.find(*local) {
+            *local = self.relevant.original[relevant];
         }
     }
 
@@ -336,414 +312,95 @@ impl<'tcx> MutVisitor<'tcx> for Merger<'tcx> {
 }
 
 //////////////////////////////////////////////////////////
-// Liveness filtering
+// Relevant locals
 //
-// This section enforces bullet point 2
+// Small utility to reduce size of the conflict matrix by only considering locals that appear in
+// the candidates
 
-struct FilterInformation<'a, 'tcx> {
-    body: &'a Body<'tcx>,
-    points: &'a DenseLocationMap,
-    live: &'a SparseIntervalMatrix<Local, PointIndex>,
-    candidates: &'a mut Candidates,
-    write_info: &'a mut WriteInfo,
-    at: Location,
+newtype_index! {
+    /// Represent a subset of locals which appear in candidates.
+    struct RelevantLocal {}
 }
 
-// We first implement some utility functions which we will expose removing candidates according to
-// different needs. Throughout the liveness filtering, the `candidates` are only ever accessed
-// through these methods, and not directly.
-impl Candidates {
-    /// Collects the candidates for merging.
-    ///
-    /// This is responsible for enforcing the first and third bullet point.
-    fn reset_and_find<'tcx>(&mut self, body: &Body<'tcx>, borrowed: &DenseBitSet<Local>) {
-        self.c.clear();
-        self.reverse.clear();
-        let mut visitor = FindAssignments { body, candidates: &mut self.c, borrowed };
-        visitor.visit_body(body);
-        // Deduplicate candidates.
-        for (_, cands) in self.c.iter_mut() {
-            cands.sort();
-            cands.dedup();
-        }
-        // Generate the reverse map.
-        for (src, cands) in self.c.iter() {
-            for dest in cands.iter().copied() {
-                self.reverse.entry(dest).or_default().push(*src);
-            }
-        }
-    }
-
-    /// Just `Vec::retain`, but the condition is inverted and we add debugging output
-    fn vec_filter_candidates(
-        src: Local,
-        v: &mut Vec<Local>,
-        mut f: impl FnMut(Local) -> CandidateFilter,
-        at: Location,
-    ) {
-        v.retain(|dest| {
-            let remove = f(*dest);
-            if remove == CandidateFilter::Remove {
-                trace!("eliminating {:?} => {:?} due to conflict at {:?}", src, dest, at);
-            }
-            remove == CandidateFilter::Keep
-        });
-    }
-
-    /// `vec_filter_candidates` but for an `Entry`
-    fn entry_filter_candidates(
-        mut entry: IndexOccupiedEntry<'_, Local, Vec<Local>>,
-        p: Local,
-        f: impl FnMut(Local) -> CandidateFilter,
-        at: Location,
-    ) {
-        let candidates = entry.get_mut();
-        Self::vec_filter_candidates(p, candidates, f, at);
-        if candidates.len() == 0 {
-            // FIXME(#120456) - is `swap_remove` correct?
-            entry.swap_remove();
-        }
-    }
-
-    /// For all candidates `(p, q)` or `(q, p)` removes the candidate if `f(q)` says to do so
-    fn filter_candidates_by(
-        &mut self,
-        p: Local,
-        mut f: impl FnMut(Local) -> CandidateFilter,
-        at: Location,
-    ) {
-        // Cover the cases where `p` appears as a `src`
-        if let IndexEntry::Occupied(entry) = self.c.entry(p) {
-            Self::entry_filter_candidates(entry, p, &mut f, at);
-        }
-        // And the cases where `p` appears as a `dest`
-        let Some(srcs) = self.reverse.get_mut(&p) else {
-            return;
-        };
-        // We use `retain` here to remove the elements from the reverse set if we've removed the
-        // matching candidate in the forward set.
-        srcs.retain(|src| {
-            if f(*src) == CandidateFilter::Keep {
-                return true;
-            }
-            let IndexEntry::Occupied(entry) = self.c.entry(*src) else {
-                return false;
-            };
-            Self::entry_filter_candidates(
-                entry,
-                *src,
-                |dest| {
-                    if dest == p { CandidateFilter::Remove } else { CandidateFilter::Keep }
-                },
-                at,
-            );
-            false
-        });
-    }
+#[derive(Debug)]
+struct RelevantLocals {
+    original: IndexVec<RelevantLocal, Local>,
+    shrink: IndexVec<Local, Option<RelevantLocal>>,
+    renames: UnionFind<RelevantLocal>,
 }
 
-#[derive(Copy, Clone, PartialEq, Eq)]
-enum CandidateFilter {
-    Keep,
-    Remove,
-}
+impl RelevantLocals {
+    #[tracing::instrument(level = "trace", skip(candidates, num_locals), ret)]
+    fn compute(candidates: &Candidates, num_locals: usize) -> RelevantLocals {
+        let mut original = IndexVec::with_capacity(candidates.c.len());
+        let mut shrink = IndexVec::from_elem_n(None, num_locals);
 
-impl<'a, 'tcx> FilterInformation<'a, 'tcx> {
-    /// Filters the set of candidates to remove those that conflict.
-    ///
-    /// The steps we take are exactly those that are outlined at the top of the file. For each
-    /// statement/terminator, we collect the set of locals that are written to in that
-    /// statement/terminator, and then we remove all pairs of candidates that contain one such local
-    /// and another one that is live.
-    ///
-    /// We need to be careful about the ordering of operations within each statement/terminator
-    /// here. Many statements might write and read from more than one place, and we need to consider
-    /// them all. The strategy for doing this is as follows: We first gather all the places that are
-    /// written to within the statement/terminator via `WriteInfo`. Then, we use the liveness
-    /// analysis from *before* the statement/terminator (in the control flow sense) to eliminate
-    /// candidates - this is because we want to conservatively treat a pair of locals that is both
-    /// read and written in the statement/terminator to be conflicting, and the liveness analysis
-    /// before the statement/terminator will correctly report locals that are read in the
-    /// statement/terminator to be live. We are additionally conservative by treating all written to
-    /// locals as also being read from.
-    fn filter_liveness(
-        candidates: &mut Candidates,
-        points: &DenseLocationMap,
-        live: &SparseIntervalMatrix<Local, PointIndex>,
-        write_info: &mut WriteInfo,
-        body: &Body<'tcx>,
-    ) {
-        let mut this = FilterInformation {
-            body,
-            points,
-            live,
-            candidates,
-            // We don't actually store anything at this scope, we just keep things here to be able
-            // to reuse the allocation.
-            write_info,
-            // Doesn't matter what we put here, will be overwritten before being used
-            at: Location::START,
+        // Mark a local as relevant and record it into the maps.
+        let mut declare = |local| {
+            shrink.get_or_insert_with(local, || original.push(local));
         };
-        this.internal_filter_liveness();
-    }
 
-    fn internal_filter_liveness(&mut self) {
-        for (block, data) in traversal::preorder(self.body) {
-            self.at = Location { block, statement_index: data.statements.len() };
-            self.write_info.for_terminator(&data.terminator().kind);
-            self.apply_conflicts();
-
-            for (i, statement) in data.statements.iter().enumerate().rev() {
-                self.at = Location { block, statement_index: i };
-                self.write_info.for_statement(&statement.kind, self.body);
-                self.apply_conflicts();
-            }
+        for &(src, dest) in candidates.c.iter() {
+            declare(src);
+            declare(dest)
         }
-    }
 
-    fn apply_conflicts(&mut self) {
-        let writes = &self.write_info.writes;
-        for p in writes {
-            let other_skip = self.write_info.skip_pair.and_then(|(a, b)| {
-                if a == *p {
-                    Some(b)
-                } else if b == *p {
-                    Some(a)
-                } else {
-                    None
-                }
-            });
-            let at = self.points.point_from_location(self.at);
-            self.candidates.filter_candidates_by(
-                *p,
-                |q| {
-                    if Some(q) == other_skip {
-                        return CandidateFilter::Keep;
-                    }
-                    // It is possible that a local may be live for less than the
-                    // duration of a statement This happens in the case of function
-                    // calls or inline asm. Because of this, we also mark locals as
-                    // conflicting when both of them are written to in the same
-                    // statement.
-                    if self.live.contains(q, at) || writes.contains(&q) {
-                        CandidateFilter::Remove
-                    } else {
-                        CandidateFilter::Keep
-                    }
-                },
-                self.at,
-            );
-        }
+        let renames = UnionFind::new(original.len());
+        RelevantLocals { original, shrink, renames }
     }
-}
 
-/// Describes where a statement/terminator writes to
-#[derive(Default, Debug)]
-struct WriteInfo {
-    writes: Vec<Local>,
-    /// If this pair of locals is a candidate pair, completely skip processing it during this
-    /// statement. All other candidates are unaffected.
-    skip_pair: Option<(Local, Local)>,
-}
-
-impl WriteInfo {
-    fn for_statement<'tcx>(&mut self, statement: &StatementKind<'tcx>, body: &Body<'tcx>) {
-        self.reset();
-        match statement {
-            StatementKind::Assign(box (lhs, rhs)) => {
-                self.add_place(*lhs);
-                match rhs {
-                    Rvalue::Use(op) => {
-                        self.add_operand(op);
-                        self.consider_skipping_for_assign_use(*lhs, op, body);
-                    }
-                    Rvalue::Repeat(op, _) => {
-                        self.add_operand(op);
-                    }
-                    Rvalue::Cast(_, op, _)
-                    | Rvalue::UnaryOp(_, op)
-                    | Rvalue::ShallowInitBox(op, _) => {
-                        self.add_operand(op);
-                    }
-                    Rvalue::BinaryOp(_, ops) => {
-                        for op in [&ops.0, &ops.1] {
-                            self.add_operand(op);
-                        }
-                    }
-                    Rvalue::Aggregate(_, ops) => {
-                        for op in ops {
-                            self.add_operand(op);
-                        }
-                    }
-                    Rvalue::WrapUnsafeBinder(op, _) => {
-                        self.add_operand(op);
-                    }
-                    Rvalue::ThreadLocalRef(_)
-                    | Rvalue::NullaryOp(_, _)
-                    | Rvalue::Ref(_, _, _)
-                    | Rvalue::RawPtr(_, _)
-                    | Rvalue::Len(_)
-                    | Rvalue::Discriminant(_)
-                    | Rvalue::CopyForDeref(_) => {}
-                }
-            }
-            // Retags are technically also reads, but reporting them as a write suffices
-            StatementKind::SetDiscriminant { place, .. }
-            | StatementKind::Deinit(place)
-            | StatementKind::Retag(_, place) => {
-                self.add_place(**place);
-            }
-            StatementKind::Intrinsic(_)
-            | StatementKind::ConstEvalCounter
-            | StatementKind::Nop
-            | StatementKind::Coverage(_)
-            | StatementKind::StorageLive(_)
-            | StatementKind::StorageDead(_)
-            | StatementKind::BackwardIncompatibleDropHint { .. }
-            | StatementKind::PlaceMention(_) => {}
-            StatementKind::FakeRead(_) | StatementKind::AscribeUserType(_, _) => {
-                bug!("{:?} not found in this MIR phase", statement)
-            }
-        }
+    fn find(&mut self, src: Local) -> Option<RelevantLocal> {
+        let src = self.shrink[src]?;
+        let src = self.renames.find(src);
+        Some(src)
     }
 
-    fn consider_skipping_for_assign_use<'tcx>(
-        &mut self,
-        lhs: Place<'tcx>,
-        rhs: &Operand<'tcx>,
-        body: &Body<'tcx>,
-    ) {
-        let Some(rhs) = rhs.place() else { return };
-        if let Some(pair) = places_to_candidate_pair(lhs, rhs, body) {
-            self.skip_pair = Some(pair);
-        }
-    }
-
-    fn for_terminator<'tcx>(&mut self, terminator: &TerminatorKind<'tcx>) {
-        self.reset();
-        match terminator {
-            TerminatorKind::SwitchInt { discr: op, .. }
-            | TerminatorKind::Assert { cond: op, .. } => {
-                self.add_operand(op);
-            }
-            TerminatorKind::Call { destination, func, args, .. } => {
-                self.add_place(*destination);
-                self.add_operand(func);
-                for arg in args {
-                    self.add_operand(&arg.node);
-                }
-            }
-            TerminatorKind::TailCall { func, args, .. } => {
-                self.add_operand(func);
-                for arg in args {
-                    self.add_operand(&arg.node);
-                }
-            }
-            TerminatorKind::InlineAsm { operands, .. } => {
-                for asm_operand in operands {
-                    match asm_operand {
-                        InlineAsmOperand::In { value, .. } => {
-                            self.add_operand(value);
-                        }
-                        InlineAsmOperand::Out { place, .. } => {
-                            if let Some(place) = place {
-                                self.add_place(*place);
-                            }
-                        }
-                        // Note that the `late` field in `InOut` is about whether the registers used
-                        // for these things overlap, and is of absolutely no interest to us.
-                        InlineAsmOperand::InOut { in_value, out_place, .. } => {
-                            if let Some(place) = out_place {
-                                self.add_place(*place);
-                            }
-                            self.add_operand(in_value);
-                        }
-                        InlineAsmOperand::Const { .. }
-                        | InlineAsmOperand::SymFn { .. }
-                        | InlineAsmOperand::SymStatic { .. }
-                        | InlineAsmOperand::Label { .. } => {}
-                    }
-                }
-            }
-            TerminatorKind::Goto { .. }
-            | TerminatorKind::UnwindResume
-            | TerminatorKind::UnwindTerminate(_)
-            | TerminatorKind::Return
-            | TerminatorKind::Unreachable { .. } => (),
-            TerminatorKind::Drop { .. } => {
-                // `Drop`s create a `&mut` and so are not considered
-            }
-            TerminatorKind::Yield { .. }
-            | TerminatorKind::CoroutineDrop
-            | TerminatorKind::FalseEdge { .. }
-            | TerminatorKind::FalseUnwind { .. } => {
-                bug!("{:?} not found in this MIR phase", terminator)
-            }
-        }
-    }
-
-    fn add_place(&mut self, place: Place<'_>) {
-        self.writes.push(place.local);
-    }
-
-    fn add_operand<'tcx>(&mut self, op: &Operand<'tcx>) {
-        match op {
-            // FIXME(JakobDegen): In a previous version, the `Move` case was incorrectly treated as
-            // being a read only. This was unsound, however we cannot add a regression test because
-            // it is not possible to set this off with current MIR. Once we have that ability, a
-            // regression test should be added.
-            Operand::Move(p) => self.add_place(*p),
-            Operand::Copy(_) | Operand::Constant(_) => (),
-        }
-    }
-
-    fn reset(&mut self) {
-        self.writes.clear();
-        self.skip_pair = None;
+    fn union(&mut self, lhs: RelevantLocal, rhs: RelevantLocal) -> RelevantLocal {
+        let head = self.renames.unify(lhs, rhs);
+        // We need to ensure we keep the original local of the RHS, as it may be a required local.
+        self.original[head] = self.original[rhs];
+        head
     }
 }
 
 /////////////////////////////////////////////////////
 // Candidate accumulation
 
-/// If the pair of places is being considered for merging, returns the candidate which would be
-/// merged in order to accomplish this.
-///
-/// The contract here is in one direction - there is a guarantee that merging the locals that are
-/// outputted by this function would result in an assignment between the inputs becoming a
-/// self-assignment. However, there is no guarantee that the returned pair is actually suitable for
-/// merging - candidate collection must still check this independently.
-///
-/// This output is unique for each unordered pair of input places.
-fn places_to_candidate_pair<'tcx>(
-    a: Place<'tcx>,
-    b: Place<'tcx>,
-    body: &Body<'tcx>,
-) -> Option<(Local, Local)> {
-    let (mut a, mut b) = if a.projection.len() == 0 && b.projection.len() == 0 {
-        (a.local, b.local)
-    } else {
-        return None;
-    };
+#[derive(Debug, Default)]
+struct Candidates {
+    /// The set of candidates we are considering in this optimization.
+    ///
+    /// Whether a place ends up in the key or the value does not correspond to whether it appears as
+    /// the lhs or rhs of any assignment. As a matter of fact, the places in here might never appear
+    /// in an assignment at all. This happens because if we see an assignment like this:
+    ///
+    /// ```ignore (syntax-highlighting-only)
+    /// _1.0 = _2.0
+    /// ```
+    ///
+    /// We will still report that we would like to merge `_1` and `_2` in an attempt to allow us to
+    /// remove that assignment.
+    c: Vec<(Local, Local)>,
+}
 
-    // By sorting, we make sure we're input order independent
-    if a > b {
-        std::mem::swap(&mut a, &mut b);
-    }
+// We first implement some utility functions which we will expose removing candidates according to
+// different needs. Throughout the liveness filtering, the `candidates` are only ever accessed
+// through these methods, and not directly.
+impl Candidates {
+    /// Collects the candidates for merging.
+    ///
+    /// This is responsible for enforcing the first and third bullet point.
+    fn find(body: &Body<'_>, borrowed: &DenseBitSet<Local>) -> Candidates {
+        let mut visitor = FindAssignments { body, candidates: Default::default(), borrowed };
+        visitor.visit_body(body);
 
-    // We could now return `(a, b)`, but then we miss some candidates in the case where `a` can't be
-    // used as a `src`.
-    if is_local_required(a, body) {
-        std::mem::swap(&mut a, &mut b);
+        Candidates { c: visitor.candidates }
     }
-    // We could check `is_local_required` again here, but there's no need - after all, we make no
-    // promise that the candidate pair is actually valid
-    Some((a, b))
 }
 
 struct FindAssignments<'a, 'tcx> {
     body: &'a Body<'tcx>,
-    candidates: &'a mut FxIndexMap<Local, Vec<Local>>,
+    candidates: Vec<(Local, Local)>,
     borrowed: &'a DenseBitSet<Local>,
 }
 
@@ -753,11 +410,9 @@ impl<'tcx> Visitor<'tcx> for FindAssignments<'_, 'tcx> {
             lhs,
             Rvalue::CopyForDeref(rhs) | Rvalue::Use(Operand::Copy(rhs) | Operand::Move(rhs)),
         )) = &statement.kind
+            && let Some(src) = lhs.as_local()
+            && let Some(dest) = rhs.as_local()
         {
-            let Some((src, dest)) = places_to_candidate_pair(*lhs, *rhs, self.body) else {
-                return;
-            };
-
             // As described at the top of the file, we do not go near things that have
             // their address taken.
             if self.borrowed.contains(src) || self.borrowed.contains(dest) {
@@ -774,13 +429,8 @@ impl<'tcx> Visitor<'tcx> for FindAssignments<'_, 'tcx> {
                 return;
             }
 
-            // Also, we need to make sure that MIR actually allows the `src` to be removed
-            if is_local_required(src, self.body) {
-                return;
-            }
-
             // We may insert duplicates here, but that's fine
-            self.candidates.entry(src).or_default().push(dest);
+            self.candidates.push((src, dest));
         }
     }
 }
@@ -803,22 +453,162 @@ fn dest_prop_mir_dump<'tcx>(
     tcx: TyCtxt<'tcx>,
     body: &Body<'tcx>,
     points: &DenseLocationMap,
-    live: &SparseIntervalMatrix<Local, PointIndex>,
-    round: usize,
+    live: &SparseIntervalMatrix<RelevantLocal, TwoStepIndex>,
+    relevant: &RelevantLocals,
 ) {
     let locals_live_at = |location| {
-        let location = points.point_from_location(location);
-        live.rows().filter(|&r| live.contains(r, location)).collect::<Vec<_>>()
+        live.rows()
+            .filter(|&r| live.contains(r, location))
+            .map(|rl| relevant.original[rl])
+            .collect::<Vec<_>>()
     };
 
     if let Some(dumper) = MirDumper::new(tcx, "DestinationPropagation-dataflow", body) {
         let extra_data = &|pass_where, w: &mut dyn std::io::Write| {
             if let PassWhere::BeforeLocation(loc) = pass_where {
-                writeln!(w, "        // live: {:?}", locals_live_at(loc))?;
+                let location = TwoStepIndex::new(points, loc, Effect::Before);
+                let live = locals_live_at(location);
+                writeln!(w, "        // before: {:?} => {:?}", location, live)?;
+            }
+            if let PassWhere::AfterLocation(loc) = pass_where {
+                let location = TwoStepIndex::new(points, loc, Effect::After);
+                let live = locals_live_at(location);
+                writeln!(w, "        // after: {:?} => {:?}", location, live)?;
             }
             Ok(())
         };
 
-        dumper.set_disambiguator(&round).set_extra_data(extra_data).dump_mir(body)
+        dumper.set_extra_data(extra_data).dump_mir(body)
     }
 }
+
+#[derive(Copy, Clone, Debug)]
+enum Effect {
+    Before,
+    After,
+}
+
+rustc_index::newtype_index! {
+    /// A reversed `PointIndex` but with the lower bit encoding early/late inside the statement.
+    /// The reversed order allows to use the more efficient `IntervalSet::append` method while we
+    /// iterate on the statements in reverse order.
+    #[orderable]
+    #[debug_format = "TwoStepIndex({})"]
+    struct TwoStepIndex {}
+}
+
+impl TwoStepIndex {
+    fn new(elements: &DenseLocationMap, location: Location, effect: Effect) -> TwoStepIndex {
+        let point = elements.point_from_location(location);
+        let effect = match effect {
+            Effect::Before => 0,
+            Effect::After => 1,
+        };
+        let max_index = 2 * elements.num_points() as u32 - 1;
+        let index = 2 * point.as_u32() + (effect as u32);
+        // Reverse the indexing to use more efficient `IntervalSet::append`.
+        TwoStepIndex::from_u32(max_index - index)
+    }
+}
+
+struct VisitPlacesWith<F>(F);
+
+impl<'tcx, F> Visitor<'tcx> for VisitPlacesWith<F>
+where
+    F: FnMut(Place<'tcx>, PlaceContext),
+{
+    fn visit_local(&mut self, local: Local, ctxt: PlaceContext, _: Location) {
+        (self.0)(local.into(), ctxt);
+    }
+
+    fn visit_place(&mut self, place: &Place<'tcx>, ctxt: PlaceContext, location: Location) {
+        (self.0)(*place, ctxt);
+        self.visit_projection(place.as_ref(), ctxt, location);
+    }
+}
+
+/// Add points depending on the result of the given dataflow analysis.
+fn save_as_intervals<'tcx>(
+    elements: &DenseLocationMap,
+    body: &Body<'tcx>,
+    relevant: &RelevantLocals,
+    results: Results<DenseBitSet<Local>>,
+) -> SparseIntervalMatrix<RelevantLocal, TwoStepIndex> {
+    let mut values = SparseIntervalMatrix::new(2 * elements.num_points());
+    let mut state = MaybeLiveLocals.bottom_value(body);
+    let reachable_blocks = traversal::reachable_as_bitset(body);
+
+    let two_step_loc = |location, effect| TwoStepIndex::new(elements, location, effect);
+    let append_at =
+        |values: &mut SparseIntervalMatrix<_, _>, state: &DenseBitSet<Local>, twostep| {
+            for (relevant, &original) in relevant.original.iter_enumerated() {
+                if state.contains(original) {
+                    values.append(relevant, twostep);
+                }
+            }
+        };
+
+    // Iterate blocks in decreasing order, to visit locations in decreasing order. This
+    // allows to use the more efficient `append` method to interval sets.
+    for block in body.basic_blocks.indices().rev() {
+        if !reachable_blocks.contains(block) {
+            continue;
+        }
+
+        state.clone_from(&results[block]);
+
+        let block_data = &body.basic_blocks[block];
+        let loc = Location { block, statement_index: block_data.statements.len() };
+
+        let term = block_data.terminator();
+        let mut twostep = two_step_loc(loc, Effect::After);
+        append_at(&mut values, &state, twostep);
+        // Ensure we have a non-zero live range even for dead stores. This is done by marking all
+        // the written-to locals as live in the second half of the statement.
+        // We also ensure that operands read by terminators conflict with writes by that terminator.
+        // For instance a function call may read args after having written to the destination.
+        VisitPlacesWith(|place, ctxt| match DefUse::for_place(place, ctxt) {
+            DefUse::Def | DefUse::Use | DefUse::PartialWrite => {
+                if let Some(relevant) = relevant.shrink[place.local] {
+                    values.insert(relevant, twostep);
+                }
+            }
+            DefUse::NonUse => {}
+        })
+        .visit_terminator(term, loc);
+
+        twostep = TwoStepIndex::from_u32(twostep.as_u32() + 1);
+        debug_assert_eq!(twostep, two_step_loc(loc, Effect::Before));
+        MaybeLiveLocals.apply_early_terminator_effect(&mut state, term, loc);
+        MaybeLiveLocals.apply_primary_terminator_effect(&mut state, term, loc);
+        append_at(&mut values, &state, twostep);
+
+        for (statement_index, stmt) in block_data.statements.iter().enumerate().rev() {
+            let loc = Location { block, statement_index };
+            twostep = TwoStepIndex::from_u32(twostep.as_u32() + 1);
+            debug_assert_eq!(twostep, two_step_loc(loc, Effect::After));
+            append_at(&mut values, &state, twostep);
+            // Ensure we have a non-zero live range even for dead stores. This is done by marking
+            // all the written-to locals as live in the second half of the statement.
+            VisitPlacesWith(|place, ctxt| match DefUse::for_place(place, ctxt) {
+                DefUse::Def | DefUse::PartialWrite => {
+                    if let Some(relevant) = relevant.shrink[place.local] {
+                        values.insert(relevant, twostep);
+                    }
+                }
+                DefUse::Use | DefUse::NonUse => {}
+            })
+            .visit_statement(stmt, loc);
+
+            twostep = TwoStepIndex::from_u32(twostep.as_u32() + 1);
+            debug_assert_eq!(twostep, two_step_loc(loc, Effect::Before));
+            MaybeLiveLocals.apply_early_statement_effect(&mut state, stmt, loc);
+            MaybeLiveLocals.apply_primary_statement_effect(&mut state, stmt, loc);
+            // ... but reads from operands are marked as live here so they do not conflict with
+            // the all the writes we manually marked as live in the second half of the statement.
+            append_at(&mut values, &state, twostep);
+        }
+    }
+
+    values
+}
diff --git a/compiler/rustc_mir_transform/src/gvn.rs b/compiler/rustc_mir_transform/src/gvn.rs
index 9da6e034c3b..f867c130efb 100644
--- a/compiler/rustc_mir_transform/src/gvn.rs
+++ b/compiler/rustc_mir_transform/src/gvn.rs
@@ -896,18 +896,13 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
 
     fn simplify_aggregate_to_copy(
         &mut self,
-        lhs: &Place<'tcx>,
-        rvalue: &mut Rvalue<'tcx>,
-        location: Location,
-        fields: &[VnIndex],
+        ty: Ty<'tcx>,
         variant_index: VariantIdx,
+        fields: &[VnIndex],
     ) -> Option<VnIndex> {
-        let Some(&first_field) = fields.first() else {
-            return None;
-        };
-        let Value::Projection(copy_from_value, _) = *self.get(first_field) else {
-            return None;
-        };
+        let Some(&first_field) = fields.first() else { return None };
+        let Value::Projection(copy_from_value, _) = *self.get(first_field) else { return None };
+
         // All fields must correspond one-to-one and come from the same aggregate value.
         if fields.iter().enumerate().any(|(index, &v)| {
             if let Value::Projection(pointer, ProjectionElem::Field(from_index, _)) = *self.get(v)
@@ -934,21 +929,8 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
             }
         }
 
-        // Allow introducing places with non-constant offsets, as those are still better than
-        // reconstructing an aggregate.
-        if self.ty(copy_from_local_value) == rvalue.ty(self.local_decls, self.tcx)
-            && let Some(place) = self.try_as_place(copy_from_local_value, location, true)
-        {
-            // Avoid creating `*a = copy (*b)`, as they might be aliases resulting in overlapping assignments.
-            // FIXME: This also avoids any kind of projection, not just derefs. We can add allowed projections.
-            if lhs.as_local().is_some() {
-                self.reused_locals.insert(place.local);
-                *rvalue = Rvalue::Use(Operand::Copy(place));
-            }
-            return Some(copy_from_local_value);
-        }
-
-        None
+        // Both must be variants of the same type.
+        if self.ty(copy_from_local_value) == ty { Some(copy_from_local_value) } else { None }
     }
 
     fn simplify_aggregate(
@@ -1035,9 +1017,16 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
             return Some(self.insert(ty, Value::Repeat(first, len)));
         }
 
-        if let Some(value) =
-            self.simplify_aggregate_to_copy(lhs, rvalue, location, &fields, variant_index)
-        {
+        if let Some(value) = self.simplify_aggregate_to_copy(ty, variant_index, &fields) {
+            // Allow introducing places with non-constant offsets, as those are still better than
+            // reconstructing an aggregate. But avoid creating `*a = copy (*b)`, as they might be
+            // aliases resulting in overlapping assignments.
+            let allow_complex_projection =
+                lhs.projection[..].iter().all(PlaceElem::is_stable_offset);
+            if let Some(place) = self.try_as_place(value, location, allow_complex_projection) {
+                self.reused_locals.insert(place.local);
+                *rvalue = Rvalue::Use(Operand::Copy(place));
+            }
             return Some(value);
         }
 
diff --git a/compiler/rustc_mir_transform/src/ssa.rs b/compiler/rustc_mir_transform/src/ssa.rs
index cd9a7f4a39d..73c249a3c8c 100644
--- a/compiler/rustc_mir_transform/src/ssa.rs
+++ b/compiler/rustc_mir_transform/src/ssa.rs
@@ -225,6 +225,9 @@ impl SsaVisitor<'_, '_> {
 
 impl<'tcx> Visitor<'tcx> for SsaVisitor<'_, 'tcx> {
     fn visit_local(&mut self, local: Local, ctxt: PlaceContext, loc: Location) {
+        if ctxt.may_observe_address() {
+            self.borrowed_locals.insert(local);
+        }
         match ctxt {
             PlaceContext::MutatingUse(MutatingUseContext::Projection)
             | PlaceContext::NonMutatingUse(NonMutatingUseContext::Projection) => bug!(),
@@ -237,7 +240,6 @@ impl<'tcx> Visitor<'tcx> for SsaVisitor<'_, 'tcx> {
             PlaceContext::NonMutatingUse(
                 NonMutatingUseContext::SharedBorrow | NonMutatingUseContext::FakeBorrow,
             ) => {
-                self.borrowed_locals.insert(local);
                 self.check_dominates(local, loc);
                 self.direct_uses[local] += 1;
             }
diff --git a/library/core/src/num/f32.rs b/library/core/src/num/f32.rs
index 4bb0db58fa5..cefcf1d1fe2 100644
--- a/library/core/src/num/f32.rs
+++ b/library/core/src/num/f32.rs
@@ -1946,8 +1946,8 @@ pub mod math {
     /// let abs_difference_x = (f32::math::abs_sub(x, 1.0) - 2.0).abs();
     /// let abs_difference_y = (f32::math::abs_sub(y, 1.0) - 0.0).abs();
     ///
-    /// assert!(abs_difference_x <= f32::EPSILON);
-    /// assert!(abs_difference_y <= f32::EPSILON);
+    /// assert!(abs_difference_x <= 1e-6);
+    /// assert!(abs_difference_y <= 1e-6);
     /// ```
     ///
     /// _This standalone function is for testing only.
@@ -1992,7 +1992,7 @@ pub mod math {
     /// // x^(1/3) - 2 == 0
     /// let abs_difference = (f32::math::cbrt(x) - 2.0).abs();
     ///
-    /// assert!(abs_difference <= f32::EPSILON);
+    /// assert!(abs_difference <= 1e-6);
     /// ```
     ///
     /// _This standalone function is for testing only.
diff --git a/library/std/src/fs/tests.rs b/library/std/src/fs/tests.rs
index f4431cedd18..f8dfb0d6334 100644
--- a/library/std/src/fs/tests.rs
+++ b/library/std/src/fs/tests.rs
@@ -226,6 +226,7 @@ fn file_test_io_seek_and_write() {
     target_os = "freebsd",
     target_os = "linux",
     target_os = "netbsd",
+    target_os = "solaris",
     target_vendor = "apple",
 ))]
 fn file_lock_multiple_shared() {
@@ -249,6 +250,7 @@ fn file_lock_multiple_shared() {
     target_os = "freebsd",
     target_os = "linux",
     target_os = "netbsd",
+    target_os = "solaris",
     target_vendor = "apple",
 ))]
 fn file_lock_blocking() {
@@ -273,6 +275,7 @@ fn file_lock_blocking() {
     target_os = "freebsd",
     target_os = "linux",
     target_os = "netbsd",
+    target_os = "solaris",
     target_vendor = "apple",
 ))]
 fn file_lock_drop() {
@@ -294,6 +297,7 @@ fn file_lock_drop() {
     target_os = "freebsd",
     target_os = "linux",
     target_os = "netbsd",
+    target_os = "solaris",
     target_vendor = "apple",
 ))]
 fn file_lock_dup() {
diff --git a/library/std/src/num/f128.rs b/library/std/src/num/f128.rs
index 64e604e35f7..b83692390b6 100644
--- a/library/std/src/num/f128.rs
+++ b/library/std/src/num/f128.rs
@@ -467,10 +467,10 @@ impl f128 {
     /// # #[cfg(not(miri))]
     /// # #[cfg(target_has_reliable_f128_math)] {
     ///
-    /// let f = std::f128::consts::FRAC_PI_2;
+    /// let f = std::f128::consts::FRAC_PI_4;
     ///
     /// // asin(sin(pi/2))
-    /// let abs_difference = (f.sin().asin() - std::f128::consts::FRAC_PI_2).abs();
+    /// let abs_difference = (f.sin().asin() - f).abs();
     ///
     /// assert!(abs_difference <= f128::EPSILON);
     /// # }
@@ -912,10 +912,10 @@ impl f128 {
     /// # #[cfg(not(miri))]
     /// # #[cfg(target_has_reliable_f128_math)] {
     ///
-    /// let e = std::f128::consts::E;
-    /// let f = e.tanh().atanh();
+    /// let x = std::f128::consts::FRAC_PI_6;
+    /// let f = x.tanh().atanh();
     ///
-    /// let abs_difference = (f - e).abs();
+    /// let abs_difference = (f - x).abs();
     ///
     /// assert!(abs_difference <= 1e-5);
     /// # }
diff --git a/library/std/src/num/f16.rs b/library/std/src/num/f16.rs
index 7bdefb05858..5599528717c 100644
--- a/library/std/src/num/f16.rs
+++ b/library/std/src/num/f16.rs
@@ -432,10 +432,10 @@ impl f16 {
     /// # #[cfg(not(miri))]
     /// # #[cfg(target_has_reliable_f16_math)] {
     ///
-    /// let f = std::f16::consts::FRAC_PI_2;
+    /// let f = std::f16::consts::FRAC_PI_4;
     ///
     /// // asin(sin(pi/2))
-    /// let abs_difference = (f.sin().asin() - std::f16::consts::FRAC_PI_2).abs();
+    /// let abs_difference = (f.sin().asin() - f).abs();
     ///
     /// assert!(abs_difference <= f16::EPSILON);
     /// # }
@@ -877,10 +877,10 @@ impl f16 {
     /// # #[cfg(not(miri))]
     /// # #[cfg(target_has_reliable_f16_math)] {
     ///
-    /// let e = std::f16::consts::E;
-    /// let f = e.tanh().atanh();
+    /// let x = std::f16::consts::FRAC_PI_6;
+    /// let f = x.tanh().atanh();
     ///
-    /// let abs_difference = (f - e).abs();
+    /// let abs_difference = (f - x).abs();
     ///
     /// assert!(abs_difference <= 0.01);
     /// # }
diff --git a/library/std/src/num/f32.rs b/library/std/src/num/f32.rs
index 5dee68ad909..0247080a8d6 100644
--- a/library/std/src/num/f32.rs
+++ b/library/std/src/num/f32.rs
@@ -582,8 +582,8 @@ impl f32 {
     /// let abs_difference_x = (x.abs_sub(1.0) - 2.0).abs();
     /// let abs_difference_y = (y.abs_sub(1.0) - 0.0).abs();
     ///
-    /// assert!(abs_difference_x <= f32::EPSILON);
-    /// assert!(abs_difference_y <= f32::EPSILON);
+    /// assert!(abs_difference_x <= 1e-6);
+    /// assert!(abs_difference_y <= 1e-6);
     /// ```
     #[rustc_allow_incoherent_impl]
     #[must_use = "method returns a new number and does not mutate the original value"]
@@ -621,7 +621,7 @@ impl f32 {
     /// // x^(1/3) - 2 == 0
     /// let abs_difference = (x.cbrt() - 2.0).abs();
     ///
-    /// assert!(abs_difference <= f32::EPSILON);
+    /// assert!(abs_difference <= 1e-6);
     /// ```
     #[rustc_allow_incoherent_impl]
     #[must_use = "method returns a new number and does not mutate the original value"]
@@ -652,7 +652,7 @@ impl f32 {
     /// // sqrt(x^2 + y^2)
     /// let abs_difference = (x.hypot(y) - (x.powi(2) + y.powi(2)).sqrt()).abs();
     ///
-    /// assert!(abs_difference <= 1e-6);
+    /// assert!(abs_difference <= 1e-5);
     /// ```
     #[rustc_allow_incoherent_impl]
     #[must_use = "method returns a new number and does not mutate the original value"]
@@ -725,7 +725,7 @@ impl f32 {
     /// let x = std::f32::consts::FRAC_PI_4;
     /// let abs_difference = (x.tan() - 1.0).abs();
     ///
-    /// assert!(abs_difference <= f32::EPSILON);
+    /// assert!(abs_difference <= 1e-6);
     /// ```
     #[rustc_allow_incoherent_impl]
     #[must_use = "method returns a new number and does not mutate the original value"]
@@ -749,12 +749,12 @@ impl f32 {
     /// # Examples
     ///
     /// ```
-    /// let f = std::f32::consts::FRAC_PI_2;
+    /// let f = std::f32::consts::FRAC_PI_4;
     ///
     /// // asin(sin(pi/2))
-    /// let abs_difference = (f.sin().asin() - std::f32::consts::FRAC_PI_2).abs();
+    /// let abs_difference = (f.sin().asin() - f).abs();
     ///
-    /// assert!(abs_difference <= 1e-3);
+    /// assert!(abs_difference <= 1e-6);
     /// ```
     #[doc(alias = "arcsin")]
     #[rustc_allow_incoherent_impl]
@@ -813,7 +813,7 @@ impl f32 {
     /// // atan(tan(1))
     /// let abs_difference = (f.tan().atan() - 1.0).abs();
     ///
-    /// assert!(abs_difference <= f32::EPSILON);
+    /// assert!(abs_difference <= 1e-6);
     /// ```
     #[doc(alias = "arctan")]
     #[rustc_allow_incoherent_impl]
@@ -854,8 +854,8 @@ impl f32 {
     /// let abs_difference_1 = (y1.atan2(x1) - (-std::f32::consts::FRAC_PI_4)).abs();
     /// let abs_difference_2 = (y2.atan2(x2) - (3.0 * std::f32::consts::FRAC_PI_4)).abs();
     ///
-    /// assert!(abs_difference_1 <= f32::EPSILON);
-    /// assert!(abs_difference_2 <= f32::EPSILON);
+    /// assert!(abs_difference_1 <= 1e-5);
+    /// assert!(abs_difference_2 <= 1e-5);
     /// ```
     #[rustc_allow_incoherent_impl]
     #[must_use = "method returns a new number and does not mutate the original value"]
@@ -884,8 +884,8 @@ impl f32 {
     /// let abs_difference_0 = (f.0 - x.sin()).abs();
     /// let abs_difference_1 = (f.1 - x.cos()).abs();
     ///
-    /// assert!(abs_difference_0 <= 1e-6);
-    /// assert!(abs_difference_1 <= 1e-6);
+    /// assert!(abs_difference_0 <= 1e-4);
+    /// assert!(abs_difference_1 <= 1e-4);
     /// ```
     #[doc(alias = "sincos")]
     #[rustc_allow_incoherent_impl]
@@ -982,7 +982,7 @@ impl f32 {
     /// let g = ((e * e) - 1.0) / (2.0 * e);
     /// let abs_difference = (f - g).abs();
     ///
-    /// assert!(abs_difference <= f32::EPSILON);
+    /// assert!(abs_difference <= 1e-6);
     /// ```
     #[rustc_allow_incoherent_impl]
     #[must_use = "method returns a new number and does not mutate the original value"]
@@ -1012,7 +1012,7 @@ impl f32 {
     /// let abs_difference = (f - g).abs();
     ///
     /// // Same result
-    /// assert!(abs_difference <= f32::EPSILON);
+    /// assert!(abs_difference <= 1e-6);
     /// ```
     #[rustc_allow_incoherent_impl]
     #[must_use = "method returns a new number and does not mutate the original value"]
@@ -1042,7 +1042,7 @@ impl f32 {
     /// let g = (1.0 - e.powi(-2)) / (1.0 + e.powi(-2));
     /// let abs_difference = (f - g).abs();
     ///
-    /// assert!(abs_difference <= f32::EPSILON);
+    /// assert!(abs_difference <= 1e-6);
     /// ```
     #[rustc_allow_incoherent_impl]
     #[must_use = "method returns a new number and does not mutate the original value"]
@@ -1067,7 +1067,7 @@ impl f32 {
     ///
     /// let abs_difference = (f - x).abs();
     ///
-    /// assert!(abs_difference <= 1e-7);
+    /// assert!(abs_difference <= 1e-6);
     /// ```
     #[doc(alias = "arcsinh")]
     #[rustc_allow_incoherent_impl]
@@ -1120,10 +1120,10 @@ impl f32 {
     /// # Examples
     ///
     /// ```
-    /// let e = std::f32::consts::E;
-    /// let f = e.tanh().atanh();
+    /// let x = std::f32::consts::FRAC_PI_6;
+    /// let f = x.tanh().atanh();
     ///
-    /// let abs_difference = (f - e).abs();
+    /// let abs_difference = (f - x).abs();
     ///
     /// assert!(abs_difference <= 1e-5);
     /// ```
@@ -1153,7 +1153,7 @@ impl f32 {
     ///
     /// let abs_difference = (x.gamma() - 24.0).abs();
     ///
-    /// assert!(abs_difference <= f32::EPSILON);
+    /// assert!(abs_difference <= 1e-5);
     /// ```
     #[rustc_allow_incoherent_impl]
     #[must_use = "method returns a new number and does not mutate the original value"]
@@ -1248,7 +1248,7 @@ impl f32 {
     /// let one = x.erf() + x.erfc();
     /// let abs_difference = (one - 1.0).abs();
     ///
-    /// assert!(abs_difference <= f32::EPSILON);
+    /// assert!(abs_difference <= 1e-6);
     /// ```
     #[rustc_allow_incoherent_impl]
     #[must_use = "method returns a new number and does not mutate the original value"]
diff --git a/library/std/src/num/f64.rs b/library/std/src/num/f64.rs
index 3ec80f68bdb..1cfd3909d96 100644
--- a/library/std/src/num/f64.rs
+++ b/library/std/src/num/f64.rs
@@ -749,12 +749,12 @@ impl f64 {
     /// # Examples
     ///
     /// ```
-    /// let f = std::f64::consts::FRAC_PI_2;
+    /// let f = std::f64::consts::FRAC_PI_4;
     ///
     /// // asin(sin(pi/2))
-    /// let abs_difference = (f.sin().asin() - std::f64::consts::FRAC_PI_2).abs();
+    /// let abs_difference = (f.sin().asin() - f).abs();
     ///
-    /// assert!(abs_difference < 1e-7);
+    /// assert!(abs_difference < 1e-14);
     /// ```
     #[doc(alias = "arcsin")]
     #[rustc_allow_incoherent_impl]
@@ -1120,10 +1120,10 @@ impl f64 {
     /// # Examples
     ///
     /// ```
-    /// let e = std::f64::consts::E;
-    /// let f = e.tanh().atanh();
+    /// let x = std::f64::consts::FRAC_PI_6;
+    /// let f = x.tanh().atanh();
     ///
-    /// let abs_difference = (f - e).abs();
+    /// let abs_difference = (f - x).abs();
     ///
     /// assert!(abs_difference < 1.0e-10);
     /// ```
@@ -1153,7 +1153,7 @@ impl f64 {
     ///
     /// let abs_difference = (x.gamma() - 24.0).abs();
     ///
-    /// assert!(abs_difference <= f64::EPSILON);
+    /// assert!(abs_difference <= 1e-10);
     /// ```
     #[rustc_allow_incoherent_impl]
     #[must_use = "method returns a new number and does not mutate the original value"]
@@ -1248,7 +1248,7 @@ impl f64 {
     /// let one = x.erf() + x.erfc();
     /// let abs_difference = (one - 1.0).abs();
     ///
-    /// assert!(abs_difference <= f64::EPSILON);
+    /// assert!(abs_difference <= 1e-10);
     /// ```
     #[rustc_allow_incoherent_impl]
     #[must_use = "method returns a new number and does not mutate the original value"]
diff --git a/library/std/src/sys/fs/unix.rs b/library/std/src/sys/fs/unix.rs
index 3b525406223..dfd6ce56a76 100644
--- a/library/std/src/sys/fs/unix.rs
+++ b/library/std/src/sys/fs/unix.rs
@@ -1293,6 +1293,15 @@ impl File {
         return Ok(());
     }
 
+    #[cfg(target_os = "solaris")]
+    pub fn lock(&self) -> io::Result<()> {
+        let mut flock: libc::flock = unsafe { mem::zeroed() };
+        flock.l_type = libc::F_WRLCK as libc::c_short;
+        flock.l_whence = libc::SEEK_SET as libc::c_short;
+        cvt(unsafe { libc::fcntl(self.as_raw_fd(), libc::F_SETLKW, &flock) })?;
+        Ok(())
+    }
+
     #[cfg(not(any(
         target_os = "freebsd",
         target_os = "fuchsia",
@@ -1300,6 +1309,7 @@ impl File {
         target_os = "netbsd",
         target_os = "openbsd",
         target_os = "cygwin",
+        target_os = "solaris",
         target_vendor = "apple",
     )))]
     pub fn lock(&self) -> io::Result<()> {
@@ -1320,6 +1330,15 @@ impl File {
         return Ok(());
     }
 
+    #[cfg(target_os = "solaris")]
+    pub fn lock_shared(&self) -> io::Result<()> {
+        let mut flock: libc::flock = unsafe { mem::zeroed() };
+        flock.l_type = libc::F_RDLCK as libc::c_short;
+        flock.l_whence = libc::SEEK_SET as libc::c_short;
+        cvt(unsafe { libc::fcntl(self.as_raw_fd(), libc::F_SETLKW, &flock) })?;
+        Ok(())
+    }
+
     #[cfg(not(any(
         target_os = "freebsd",
         target_os = "fuchsia",
@@ -1327,6 +1346,7 @@ impl File {
         target_os = "netbsd",
         target_os = "openbsd",
         target_os = "cygwin",
+        target_os = "solaris",
         target_vendor = "apple",
     )))]
     pub fn lock_shared(&self) -> io::Result<()> {
@@ -1355,6 +1375,23 @@ impl File {
         }
     }
 
+    #[cfg(target_os = "solaris")]
+    pub fn try_lock(&self) -> Result<(), TryLockError> {
+        let mut flock: libc::flock = unsafe { mem::zeroed() };
+        flock.l_type = libc::F_WRLCK as libc::c_short;
+        flock.l_whence = libc::SEEK_SET as libc::c_short;
+        let result = cvt(unsafe { libc::fcntl(self.as_raw_fd(), libc::F_SETLK, &flock) });
+        if let Err(err) = result {
+            if err.kind() == io::ErrorKind::WouldBlock {
+                Err(TryLockError::WouldBlock)
+            } else {
+                Err(TryLockError::Error(err))
+            }
+        } else {
+            Ok(())
+        }
+    }
+
     #[cfg(not(any(
         target_os = "freebsd",
         target_os = "fuchsia",
@@ -1362,6 +1399,7 @@ impl File {
         target_os = "netbsd",
         target_os = "openbsd",
         target_os = "cygwin",
+        target_os = "solaris",
         target_vendor = "apple",
     )))]
     pub fn try_lock(&self) -> Result<(), TryLockError> {
@@ -1393,6 +1431,23 @@ impl File {
         }
     }
 
+    #[cfg(target_os = "solaris")]
+    pub fn try_lock_shared(&self) -> Result<(), TryLockError> {
+        let mut flock: libc::flock = unsafe { mem::zeroed() };
+        flock.l_type = libc::F_RDLCK as libc::c_short;
+        flock.l_whence = libc::SEEK_SET as libc::c_short;
+        let result = cvt(unsafe { libc::fcntl(self.as_raw_fd(), libc::F_SETLK, &flock) });
+        if let Err(err) = result {
+            if err.kind() == io::ErrorKind::WouldBlock {
+                Err(TryLockError::WouldBlock)
+            } else {
+                Err(TryLockError::Error(err))
+            }
+        } else {
+            Ok(())
+        }
+    }
+
     #[cfg(not(any(
         target_os = "freebsd",
         target_os = "fuchsia",
@@ -1400,6 +1455,7 @@ impl File {
         target_os = "netbsd",
         target_os = "openbsd",
         target_os = "cygwin",
+        target_os = "solaris",
         target_vendor = "apple",
     )))]
     pub fn try_lock_shared(&self) -> Result<(), TryLockError> {
@@ -1423,6 +1479,15 @@ impl File {
         return Ok(());
     }
 
+    #[cfg(target_os = "solaris")]
+    pub fn unlock(&self) -> io::Result<()> {
+        let mut flock: libc::flock = unsafe { mem::zeroed() };
+        flock.l_type = libc::F_UNLCK as libc::c_short;
+        flock.l_whence = libc::SEEK_SET as libc::c_short;
+        cvt(unsafe { libc::fcntl(self.as_raw_fd(), libc::F_SETLKW, &flock) })?;
+        Ok(())
+    }
+
     #[cfg(not(any(
         target_os = "freebsd",
         target_os = "fuchsia",
@@ -1430,6 +1495,7 @@ impl File {
         target_os = "netbsd",
         target_os = "openbsd",
         target_os = "cygwin",
+        target_os = "solaris",
         target_vendor = "apple",
     )))]
     pub fn unlock(&self) -> io::Result<()> {
diff --git a/library/std/tests/floats/f32.rs b/library/std/tests/floats/f32.rs
index c61a8ec4d20..c29d803b25e 100644
--- a/library/std/tests/floats/f32.rs
+++ b/library/std/tests/floats/f32.rs
@@ -79,8 +79,8 @@ fn test_log() {
     let nan: f32 = f32::NAN;
     let inf: f32 = f32::INFINITY;
     let neg_inf: f32 = f32::NEG_INFINITY;
-    assert_approx_eq!(10.0f32.log(10.0), 1.0, APPROX_DELTA);
-    assert_approx_eq!(2.3f32.log(3.5), 0.664858, APPROX_DELTA);
+    assert_approx_eq!(10.0f32.log(10.0), 1.0);
+    assert_approx_eq!(2.3f32.log(3.5), 0.664858);
     assert_approx_eq!(1.0f32.exp().log(1.0f32.exp()), 1.0, APPROX_DELTA);
     assert!(1.0f32.log(1.0).is_nan());
     assert!(1.0f32.log(-13.9).is_nan());
@@ -140,10 +140,10 @@ fn test_asinh() {
     assert_approx_eq!(2.0f32.asinh(), 1.443635475178810342493276740273105f32);
     assert_approx_eq!((-2.0f32).asinh(), -1.443635475178810342493276740273105f32);
     // regression test for the catastrophic cancellation fixed in 72486
-    assert_approx_eq!((-3000.0f32).asinh(), -8.699514775987968673236893537700647f32);
+    assert_approx_eq!((-3000.0f32).asinh(), -8.699514775987968673236893537700647f32, APPROX_DELTA);
 
     // test for low accuracy from issue 104548
-    assert_approx_eq!(60.0f32, 60.0f32.sinh().asinh());
+    assert_approx_eq!(60.0f32, 60.0f32.sinh().asinh(), APPROX_DELTA);
     // mul needed for approximate comparison to be meaningful
     assert_approx_eq!(1.0f32, 1e-15f32.sinh().asinh() * 1e15f32);
 }
@@ -196,8 +196,8 @@ fn test_gamma() {
     assert_approx_eq!(1.0f32.gamma(), 1.0f32);
     assert_approx_eq!(2.0f32.gamma(), 1.0f32);
     assert_approx_eq!(3.0f32.gamma(), 2.0f32);
-    assert_approx_eq!(4.0f32.gamma(), 6.0f32);
-    assert_approx_eq!(5.0f32.gamma(), 24.0f32);
+    assert_approx_eq!(4.0f32.gamma(), 6.0f32, APPROX_DELTA);
+    assert_approx_eq!(5.0f32.gamma(), 24.0f32, APPROX_DELTA);
     assert_approx_eq!(0.5f32.gamma(), consts::PI.sqrt());
     assert_approx_eq!((-0.5f32).gamma(), -2.0 * consts::PI.sqrt());
     assert_eq!(0.0f32.gamma(), f32::INFINITY);
@@ -218,7 +218,7 @@ fn test_ln_gamma() {
     assert_eq!(2.0f32.ln_gamma().1, 1);
     assert_approx_eq!(3.0f32.ln_gamma().0, 2.0f32.ln());
     assert_eq!(3.0f32.ln_gamma().1, 1);
-    assert_approx_eq!((-0.5f32).ln_gamma().0, (2.0 * consts::PI.sqrt()).ln());
+    assert_approx_eq!((-0.5f32).ln_gamma().0, (2.0 * consts::PI.sqrt()).ln(), APPROX_DELTA);
     assert_eq!((-0.5f32).ln_gamma().1, -1);
 }
 
diff --git a/src/bootstrap/src/core/build_steps/doc.rs b/src/bootstrap/src/core/build_steps/doc.rs
index 378c9c19ba5..7865b685659 100644
--- a/src/bootstrap/src/core/build_steps/doc.rs
+++ b/src/bootstrap/src/core/build_steps/doc.rs
@@ -575,6 +575,31 @@ impl Step for SharedAssets {
             FileType::Regular,
         );
 
+        builder.copy_link(
+            &builder
+                .src
+                .join("src")
+                .join("librustdoc")
+                .join("html")
+                .join("static")
+                .join("images")
+                .join("favicon.svg"),
+            &out.join("favicon.svg"),
+            FileType::Regular,
+        );
+        builder.copy_link(
+            &builder
+                .src
+                .join("src")
+                .join("librustdoc")
+                .join("html")
+                .join("static")
+                .join("images")
+                .join("favicon-32x32.png"),
+            &out.join("favicon-32x32.png"),
+            FileType::Regular,
+        );
+
         SharedAssetsPaths { version_info }
     }
 }
diff --git a/src/doc/favicon.inc b/src/doc/favicon.inc
index 9c330685209..f09498cc095 100644
--- a/src/doc/favicon.inc
+++ b/src/doc/favicon.inc
@@ -1 +1,2 @@
-<link rel="icon" href="https://www.rust-lang.org/favicon.ico">
+<link rel="alternate icon" type="image/png" href="favicon-32x32.png">
+<link rel="icon" type="image/svg+xml" href="favicon.svg">
diff --git a/src/doc/redirect.inc b/src/doc/redirect.inc
index 2fb44be0145..1b7d3744b1f 100644
--- a/src/doc/redirect.inc
+++ b/src/doc/redirect.inc
@@ -1,2 +1,3 @@
 <meta name="robots" content="noindex,follow">
-<link rel="icon" href="https://www.rust-lang.org/favicon.ico">
+<link rel="alternate icon" type="image/png" href="../favicon-32x32.png">
+<link rel="icon" type="image/svg+xml" href="../favicon.svg">
diff --git a/src/tools/miri/src/intrinsics/mod.rs b/src/tools/miri/src/intrinsics/mod.rs
index 5e46768b0e6..4628c30e2df 100644
--- a/src/tools/miri/src/intrinsics/mod.rs
+++ b/src/tools/miri/src/intrinsics/mod.rs
@@ -3,11 +3,8 @@
 mod atomic;
 mod simd;
 
-use std::ops::Neg;
-
 use rand::Rng;
 use rustc_abi::Size;
-use rustc_apfloat::ieee::{IeeeFloat, Semantics};
 use rustc_apfloat::{self, Float, Round};
 use rustc_middle::mir;
 use rustc_middle::ty::{self, FloatTy};
@@ -16,7 +13,6 @@ use rustc_span::{Symbol, sym};
 use self::atomic::EvalContextExt as _;
 use self::helpers::{ToHost, ToSoft};
 use self::simd::EvalContextExt as _;
-use crate::math::{IeeeExt, apply_random_float_error_ulp};
 use crate::*;
 
 /// Check that the number of args is what we expect.
@@ -209,7 +205,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 let [f] = check_intrinsic_arg_count(args)?;
                 let f = this.read_scalar(f)?.to_f32()?;
 
-                let res = fixed_float_value(this, intrinsic_name, &[f]).unwrap_or_else(|| {
+                let res = math::fixed_float_value(this, intrinsic_name, &[f]).unwrap_or_else(|| {
                     // Using host floats (but it's fine, these operations do not have
                     // guaranteed precision).
                     let host = f.to_host();
@@ -227,7 +223,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
                     // Apply a relative error of 4ULP to introduce some non-determinism
                     // simulating imprecise implementations and optimizations.
-                    let res = apply_random_float_error_ulp(
+                    let res = math::apply_random_float_error_ulp(
                         this,
                         res,
                         4,
@@ -235,7 +231,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
                     // Clamp the result to the guaranteed range of this function according to the C standard,
                     // if any.
-                    clamp_float_value(intrinsic_name, res)
+                    math::clamp_float_value(intrinsic_name, res)
                 });
                 let res = this.adjust_nan(res, &[f]);
                 this.write_scalar(res, dest)?;
@@ -253,7 +249,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 let [f] = check_intrinsic_arg_count(args)?;
                 let f = this.read_scalar(f)?.to_f64()?;
 
-                let res = fixed_float_value(this, intrinsic_name, &[f]).unwrap_or_else(|| {
+                let res = math::fixed_float_value(this, intrinsic_name, &[f]).unwrap_or_else(|| {
                     // Using host floats (but it's fine, these operations do not have
                     // guaranteed precision).
                     let host = f.to_host();
@@ -271,7 +267,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
                     // Apply a relative error of 4ULP to introduce some non-determinism
                     // simulating imprecise implementations and optimizations.
-                    let res = apply_random_float_error_ulp(
+                    let res = math::apply_random_float_error_ulp(
                         this,
                         res,
                         4,
@@ -279,7 +275,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
                     // Clamp the result to the guaranteed range of this function according to the C standard,
                     // if any.
-                    clamp_float_value(intrinsic_name, res)
+                    math::clamp_float_value(intrinsic_name, res)
                 });
                 let res = this.adjust_nan(res, &[f]);
                 this.write_scalar(res, dest)?;
@@ -330,14 +326,15 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 let f1 = this.read_scalar(f1)?.to_f32()?;
                 let f2 = this.read_scalar(f2)?.to_f32()?;
 
-                let res = fixed_float_value(this, intrinsic_name, &[f1, f2]).unwrap_or_else(|| {
-                    // Using host floats (but it's fine, this operation does not have guaranteed precision).
-                    let res = f1.to_host().powf(f2.to_host()).to_soft();
+                let res =
+                    math::fixed_float_value(this, intrinsic_name, &[f1, f2]).unwrap_or_else(|| {
+                        // Using host floats (but it's fine, this operation does not have guaranteed precision).
+                        let res = f1.to_host().powf(f2.to_host()).to_soft();
 
-                    // Apply a relative error of 4ULP to introduce some non-determinism
-                    // simulating imprecise implementations and optimizations.
-                    apply_random_float_error_ulp(this, res, 4)
-                });
+                        // Apply a relative error of 4ULP to introduce some non-determinism
+                        // simulating imprecise implementations and optimizations.
+                        math::apply_random_float_error_ulp(this, res, 4)
+                    });
                 let res = this.adjust_nan(res, &[f1, f2]);
                 this.write_scalar(res, dest)?;
             }
@@ -346,14 +343,15 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 let f1 = this.read_scalar(f1)?.to_f64()?;
                 let f2 = this.read_scalar(f2)?.to_f64()?;
 
-                let res = fixed_float_value(this, intrinsic_name, &[f1, f2]).unwrap_or_else(|| {
-                    // Using host floats (but it's fine, this operation does not have guaranteed precision).
-                    let res = f1.to_host().powf(f2.to_host()).to_soft();
+                let res =
+                    math::fixed_float_value(this, intrinsic_name, &[f1, f2]).unwrap_or_else(|| {
+                        // Using host floats (but it's fine, this operation does not have guaranteed precision).
+                        let res = f1.to_host().powf(f2.to_host()).to_soft();
 
-                    // Apply a relative error of 4ULP to introduce some non-determinism
-                    // simulating imprecise implementations and optimizations.
-                    apply_random_float_error_ulp(this, res, 4)
-                });
+                        // Apply a relative error of 4ULP to introduce some non-determinism
+                        // simulating imprecise implementations and optimizations.
+                        math::apply_random_float_error_ulp(this, res, 4)
+                    });
                 let res = this.adjust_nan(res, &[f1, f2]);
                 this.write_scalar(res, dest)?;
             }
@@ -363,13 +361,13 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 let f = this.read_scalar(f)?.to_f32()?;
                 let i = this.read_scalar(i)?.to_i32()?;
 
-                let res = fixed_powi_float_value(this, f, i).unwrap_or_else(|| {
+                let res = math::fixed_powi_value(this, f, i).unwrap_or_else(|| {
                     // Using host floats (but it's fine, this operation does not have guaranteed precision).
                     let res = f.to_host().powi(i).to_soft();
 
                     // Apply a relative error of 4ULP to introduce some non-determinism
                     // simulating imprecise implementations and optimizations.
-                    apply_random_float_error_ulp(this, res, 4)
+                    math::apply_random_float_error_ulp(this, res, 4)
                 });
                 let res = this.adjust_nan(res, &[f]);
                 this.write_scalar(res, dest)?;
@@ -379,13 +377,13 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 let f = this.read_scalar(f)?.to_f64()?;
                 let i = this.read_scalar(i)?.to_i32()?;
 
-                let res = fixed_powi_float_value(this, f, i).unwrap_or_else(|| {
+                let res = math::fixed_powi_value(this, f, i).unwrap_or_else(|| {
                     // Using host floats (but it's fine, this operation does not have guaranteed precision).
                     let res = f.to_host().powi(i).to_soft();
 
                     // Apply a relative error of 4ULP to introduce some non-determinism
                     // simulating imprecise implementations and optimizations.
-                    apply_random_float_error_ulp(this, res, 4)
+                    math::apply_random_float_error_ulp(this, res, 4)
                 });
                 let res = this.adjust_nan(res, &[f]);
                 this.write_scalar(res, dest)?;
@@ -440,7 +438,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 }
                 // Apply a relative error of 4ULP to simulate non-deterministic precision loss
                 // due to optimizations.
-                let res = crate::math::apply_random_float_error_to_imm(this, res, 4)?;
+                let res = math::apply_random_float_error_to_imm(this, res, 4)?;
                 this.write_immediate(*res, dest)?;
             }
 
@@ -477,108 +475,3 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         interp_ok(EmulateItemResult::NeedsReturn)
     }
 }
-
-/// For the intrinsics:
-/// - sinf32, sinf64
-/// - cosf32, cosf64
-/// - expf32, expf64, exp2f32, exp2f64
-/// - logf32, logf64, log2f32, log2f64, log10f32, log10f64
-/// - powf32, powf64
-///
-/// # Return
-///
-/// Returns `Some(output)` if the `intrinsic` results in a defined fixed `output` specified in the C standard
-/// (specifically, C23 annex F.10)  when given `args` as arguments. Outputs that are unaffected by a relative error
-/// (such as INF and zero) are not handled here, they are assumed to be handled by the underlying
-/// implementation. Returns `None` if no specific value is guaranteed.
-///
-/// # Note
-///
-/// For `powf*` operations of the form:
-///
-/// - `(SNaN)^(±0)`
-/// - `1^(SNaN)`
-///
-/// The result is implementation-defined:
-/// - musl returns for both `1.0`
-/// - glibc returns for both `NaN`
-///
-/// This discrepancy exists because SNaN handling is not consistently defined across platforms,
-/// and the C standard leaves behavior for SNaNs unspecified.
-///
-/// Miri chooses to adhere to both implementations and returns either one of them non-deterministically.
-fn fixed_float_value<S: Semantics>(
-    ecx: &mut MiriInterpCx<'_>,
-    intrinsic_name: &str,
-    args: &[IeeeFloat<S>],
-) -> Option<IeeeFloat<S>> {
-    let one = IeeeFloat::<S>::one();
-    Some(match (intrinsic_name, args) {
-        // cos(+- 0) = 1
-        ("cosf32" | "cosf64", [input]) if input.is_zero() => one,
-
-        // e^0 = 1
-        ("expf32" | "expf64" | "exp2f32" | "exp2f64", [input]) if input.is_zero() => one,
-
-        // (-1)^(±INF) = 1
-        ("powf32" | "powf64", [base, exp]) if *base == -one && exp.is_infinite() => one,
-
-        // 1^y = 1 for any y, even a NaN
-        ("powf32" | "powf64", [base, exp]) if *base == one => {
-            let rng = ecx.machine.rng.get_mut();
-            // SNaN exponents get special treatment: they might return 1, or a NaN.
-            let return_nan = exp.is_signaling() && ecx.machine.float_nondet && rng.random();
-            // Handle both the musl and glibc cases non-deterministically.
-            if return_nan { ecx.generate_nan(args) } else { one }
-        }
-
-        // x^(±0) = 1 for any x, even a NaN
-        ("powf32" | "powf64", [base, exp]) if exp.is_zero() => {
-            let rng = ecx.machine.rng.get_mut();
-            // SNaN bases get special treatment: they might return 1, or a NaN.
-            let return_nan = base.is_signaling() && ecx.machine.float_nondet && rng.random();
-            // Handle both the musl and glibc cases non-deterministically.
-            if return_nan { ecx.generate_nan(args) } else { one }
-        }
-
-        // There are a lot of cases for fixed outputs according to the C Standard, but these are
-        // mainly INF or zero which are not affected by the applied error.
-        _ => return None,
-    })
-}
-
-/// Returns `Some(output)` if `powi` (called `pown` in C) results in a fixed value specified in the
-/// C standard (specifically, C23 annex F.10.4.6) when doing `base^exp`. Otherwise, returns `None`.
-fn fixed_powi_float_value<S: Semantics>(
-    ecx: &mut MiriInterpCx<'_>,
-    base: IeeeFloat<S>,
-    exp: i32,
-) -> Option<IeeeFloat<S>> {
-    Some(match exp {
-        0 => {
-            let one = IeeeFloat::<S>::one();
-            let rng = ecx.machine.rng.get_mut();
-            let return_nan = ecx.machine.float_nondet && rng.random() && base.is_signaling();
-            // For SNaN treatment, we are consistent with `powf`above.
-            // (We wouldn't have two, unlike powf all implementations seem to agree for powi,
-            // but for now we are maximally conservative.)
-            if return_nan { ecx.generate_nan(&[base]) } else { one }
-        }
-
-        _ => return None,
-    })
-}
-
-/// Given an floating-point operation and a floating-point value, clamps the result to the output
-/// range of the given operation.
-fn clamp_float_value<S: Semantics>(intrinsic_name: &str, val: IeeeFloat<S>) -> IeeeFloat<S> {
-    match intrinsic_name {
-        // sin and cos: [-1, 1]
-        "sinf32" | "cosf32" | "sinf64" | "cosf64" =>
-            val.clamp(IeeeFloat::<S>::one().neg(), IeeeFloat::<S>::one()),
-        // exp: [0, +INF]
-        "expf32" | "exp2f32" | "expf64" | "exp2f64" =>
-            IeeeFloat::<S>::maximum(val, IeeeFloat::<S>::ZERO),
-        _ => val,
-    }
-}
diff --git a/src/tools/miri/src/math.rs b/src/tools/miri/src/math.rs
index 6427f3ca6e9..7d2f1c08368 100644
--- a/src/tools/miri/src/math.rs
+++ b/src/tools/miri/src/math.rs
@@ -1,6 +1,9 @@
+use std::ops::Neg;
+use std::{f32, f64};
+
 use rand::Rng as _;
 use rustc_apfloat::Float as _;
-use rustc_apfloat::ieee::IeeeFloat;
+use rustc_apfloat::ieee::{DoubleS, IeeeFloat, Semantics, SingleS};
 use rustc_middle::ty::{self, FloatTy, ScalarInt};
 
 use crate::*;
@@ -105,6 +108,210 @@ pub(crate) fn apply_random_float_error_to_imm<'tcx>(
     interp_ok(ImmTy::from_scalar_int(res, val.layout))
 }
 
+/// Given a floating-point operation and a floating-point value, clamps the result to the output
+/// range of the given operation according to the C standard, if any.
+pub(crate) fn clamp_float_value<S: Semantics>(
+    intrinsic_name: &str,
+    val: IeeeFloat<S>,
+) -> IeeeFloat<S>
+where
+    IeeeFloat<S>: IeeeExt,
+{
+    let zero = IeeeFloat::<S>::ZERO;
+    let one = IeeeFloat::<S>::one();
+    let two = IeeeFloat::<S>::two();
+    let pi = IeeeFloat::<S>::pi();
+    let pi_over_2 = (pi / two).value;
+
+    match intrinsic_name {
+        // sin, cos, tanh: [-1, 1]
+        #[rustfmt::skip]
+        | "sinf32"
+        | "sinf64"
+        | "cosf32"
+        | "cosf64"
+        | "tanhf"
+        | "tanh"
+         => val.clamp(one.neg(), one),
+
+        // exp: [0, +INF)
+        "expf32" | "exp2f32" | "expf64" | "exp2f64" => val.maximum(zero),
+
+        // cosh: [1, +INF)
+        "coshf" | "cosh" => val.maximum(one),
+
+        // acos: [0, π]
+        "acosf" | "acos" => val.clamp(zero, pi),
+
+        // asin: [-π, +π]
+        "asinf" | "asin" => val.clamp(pi.neg(), pi),
+
+        // atan: (-π/2, +π/2)
+        "atanf" | "atan" => val.clamp(pi_over_2.neg(), pi_over_2),
+
+        // erfc: (-1, 1)
+        "erff" | "erf" => val.clamp(one.neg(), one),
+
+        // erfc: (0, 2)
+        "erfcf" | "erfc" => val.clamp(zero, two),
+
+        // atan2(y, x): arctan(y/x) in [−π, +π]
+        "atan2f" | "atan2" => val.clamp(pi.neg(), pi),
+
+        _ => val,
+    }
+}
+
+/// For the intrinsics:
+/// - sinf32, sinf64, sinhf, sinh
+/// - cosf32, cosf64, coshf, cosh
+/// - tanhf, tanh, atanf, atan, atan2f, atan2
+/// - expf32, expf64, exp2f32, exp2f64
+/// - logf32, logf64, log2f32, log2f64, log10f32, log10f64
+/// - powf32, powf64
+/// - erff, erf, erfcf, erfc
+/// - hypotf, hypot
+///
+/// # Return
+///
+/// Returns `Some(output)` if the `intrinsic` results in a defined fixed `output` specified in the C standard
+/// (specifically, C23 annex F.10)  when given `args` as arguments. Outputs that are unaffected by a relative error
+/// (such as INF and zero) are not handled here, they are assumed to be handled by the underlying
+/// implementation. Returns `None` if no specific value is guaranteed.
+///
+/// # Note
+///
+/// For `powf*` operations of the form:
+///
+/// - `(SNaN)^(±0)`
+/// - `1^(SNaN)`
+///
+/// The result is implementation-defined:
+/// - musl returns for both `1.0`
+/// - glibc returns for both `NaN`
+///
+/// This discrepancy exists because SNaN handling is not consistently defined across platforms,
+/// and the C standard leaves behavior for SNaNs unspecified.
+///
+/// Miri chooses to adhere to both implementations and returns either one of them non-deterministically.
+pub(crate) fn fixed_float_value<S: Semantics>(
+    ecx: &mut MiriInterpCx<'_>,
+    intrinsic_name: &str,
+    args: &[IeeeFloat<S>],
+) -> Option<IeeeFloat<S>>
+where
+    IeeeFloat<S>: IeeeExt,
+{
+    let this = ecx.eval_context_mut();
+    let one = IeeeFloat::<S>::one();
+    let two = IeeeFloat::<S>::two();
+    let three = IeeeFloat::<S>::three();
+    let pi = IeeeFloat::<S>::pi();
+    let pi_over_2 = (pi / two).value;
+    let pi_over_4 = (pi_over_2 / two).value;
+
+    Some(match (intrinsic_name, args) {
+        // cos(±0) and cosh(±0)= 1
+        ("cosf32" | "cosf64" | "coshf" | "cosh", [input]) if input.is_zero() => one,
+
+        // e^0 = 1
+        ("expf32" | "expf64" | "exp2f32" | "exp2f64", [input]) if input.is_zero() => one,
+
+        // tanh(±INF) = ±1
+        ("tanhf" | "tanh", [input]) if input.is_infinite() => one.copy_sign(*input),
+
+        // atan(±INF) = ±π/2
+        ("atanf" | "atan", [input]) if input.is_infinite() => pi_over_2.copy_sign(*input),
+
+        // erf(±INF) = ±1
+        ("erff" | "erf", [input]) if input.is_infinite() => one.copy_sign(*input),
+
+        // erfc(-INF) = 2
+        ("erfcf" | "erfc", [input]) if input.is_neg_infinity() => (one + one).value,
+
+        // hypot(x, ±0) = abs(x), if x is not a NaN.
+        ("_hypotf" | "hypotf" | "_hypot" | "hypot", [x, y]) if !x.is_nan() && y.is_zero() =>
+            x.abs(),
+
+        // atan2(±0,−0) = ±π.
+        // atan2(±0, y) = ±π for y < 0.
+        // Must check for non NaN because `y.is_negative()` also applies to NaN.
+        ("atan2f" | "atan2", [x, y]) if (x.is_zero() && (y.is_negative() && !y.is_nan())) =>
+            pi.copy_sign(*x),
+
+        // atan2(±x,−∞) = ±π for finite x > 0.
+        ("atan2f" | "atan2", [x, y])
+            if (!x.is_zero() && !x.is_infinite()) && y.is_neg_infinity() =>
+            pi.copy_sign(*x),
+
+        // atan2(x, ±0) = −π/2 for x < 0.
+        // atan2(x, ±0) =  π/2 for x > 0.
+        ("atan2f" | "atan2", [x, y]) if !x.is_zero() && y.is_zero() => pi_over_2.copy_sign(*x),
+
+        //atan2(±∞, −∞) = ±3π/4
+        ("atan2f" | "atan2", [x, y]) if x.is_infinite() && y.is_neg_infinity() =>
+            (pi_over_4 * three).value.copy_sign(*x),
+
+        //atan2(±∞, +∞) = ±π/4
+        ("atan2f" | "atan2", [x, y]) if x.is_infinite() && y.is_pos_infinity() =>
+            pi_over_4.copy_sign(*x),
+
+        // atan2(±∞, y) returns ±π/2 for finite y.
+        ("atan2f" | "atan2", [x, y]) if x.is_infinite() && (!y.is_infinite() && !y.is_nan()) =>
+            pi_over_2.copy_sign(*x),
+
+        // (-1)^(±INF) = 1
+        ("powf32" | "powf64", [base, exp]) if *base == -one && exp.is_infinite() => one,
+
+        // 1^y = 1 for any y, even a NaN
+        ("powf32" | "powf64", [base, exp]) if *base == one => {
+            let rng = this.machine.rng.get_mut();
+            // SNaN exponents get special treatment: they might return 1, or a NaN.
+            let return_nan = exp.is_signaling() && this.machine.float_nondet && rng.random();
+            // Handle both the musl and glibc cases non-deterministically.
+            if return_nan { this.generate_nan(args) } else { one }
+        }
+
+        // x^(±0) = 1 for any x, even a NaN
+        ("powf32" | "powf64", [base, exp]) if exp.is_zero() => {
+            let rng = this.machine.rng.get_mut();
+            // SNaN bases get special treatment: they might return 1, or a NaN.
+            let return_nan = base.is_signaling() && this.machine.float_nondet && rng.random();
+            // Handle both the musl and glibc cases non-deterministically.
+            if return_nan { this.generate_nan(args) } else { one }
+        }
+
+        // There are a lot of cases for fixed outputs according to the C Standard, but these are
+        // mainly INF or zero which are not affected by the applied error.
+        _ => return None,
+    })
+}
+
+/// Returns `Some(output)` if `powi` (called `pown` in C) results in a fixed value specified in the
+/// C standard (specifically, C23 annex F.10.4.6) when doing `base^exp`. Otherwise, returns `None`.
+pub(crate) fn fixed_powi_value<S: Semantics>(
+    ecx: &mut MiriInterpCx<'_>,
+    base: IeeeFloat<S>,
+    exp: i32,
+) -> Option<IeeeFloat<S>>
+where
+    IeeeFloat<S>: IeeeExt,
+{
+    match exp {
+        0 => {
+            let one = IeeeFloat::<S>::one();
+            let rng = ecx.machine.rng.get_mut();
+            let return_nan = ecx.machine.float_nondet && rng.random() && base.is_signaling();
+            // For SNaN treatment, we are consistent with `powf`above.
+            // (We wouldn't have two, unlike powf all implementations seem to agree for powi,
+            // but for now we are maximally conservative.)
+            Some(if return_nan { ecx.generate_nan(&[base]) } else { one })
+        }
+
+        _ => return None,
+    }
+}
+
 pub(crate) fn sqrt<S: rustc_apfloat::ieee::Semantics>(x: IeeeFloat<S>) -> IeeeFloat<S> {
     match x.category() {
         // preserve zero sign
@@ -187,19 +394,47 @@ pub(crate) fn sqrt<S: rustc_apfloat::ieee::Semantics>(x: IeeeFloat<S>) -> IeeeFl
     }
 }
 
-/// Extend functionality of rustc_apfloat softfloats
+/// Extend functionality of `rustc_apfloat` softfloats for IEEE float types.
 pub trait IeeeExt: rustc_apfloat::Float {
+    // Some values we use:
+
     #[inline]
     fn one() -> Self {
         Self::from_u128(1).value
     }
 
     #[inline]
+    fn two() -> Self {
+        Self::from_u128(2).value
+    }
+
+    #[inline]
+    fn three() -> Self {
+        Self::from_u128(3).value
+    }
+
+    fn pi() -> Self;
+
+    #[inline]
     fn clamp(self, min: Self, max: Self) -> Self {
         self.maximum(min).minimum(max)
     }
 }
-impl<S: rustc_apfloat::ieee::Semantics> IeeeExt for IeeeFloat<S> {}
+
+macro_rules! impl_ieee_pi {
+    ($float_ty:ident, $semantic:ty) => {
+        impl IeeeExt for IeeeFloat<$semantic> {
+            #[inline]
+            fn pi() -> Self {
+                // We take the value from the standard library as the most reasonable source for an exact π here.
+                Self::from_bits($float_ty::consts::PI.to_bits().into())
+            }
+        }
+    };
+}
+
+impl_ieee_pi!(f32, SingleS);
+impl_ieee_pi!(f64, DoubleS);
 
 #[cfg(test)]
 mod tests {
diff --git a/src/tools/miri/src/shims/foreign_items.rs b/src/tools/miri/src/shims/foreign_items.rs
index a700644b95d..187423472ab 100644
--- a/src/tools/miri/src/shims/foreign_items.rs
+++ b/src/tools/miri/src/shims/foreign_items.rs
@@ -18,6 +18,7 @@ use rustc_target::callconv::FnAbi;
 use self::helpers::{ToHost, ToSoft};
 use super::alloc::EvalContextExt as _;
 use super::backtrace::EvalContextExt as _;
+use crate::helpers::EvalContextExt as _;
 use crate::*;
 
 /// Type of dynamic symbols (for `dlsym` et al)
@@ -826,33 +827,36 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
             => {
                 let [f] = this.check_shim_sig_lenient(abi, CanonAbi::C , link_name, args)?;
                 let f = this.read_scalar(f)?.to_f32()?;
-                // Using host floats (but it's fine, these operations do not have guaranteed precision).
-                let f_host = f.to_host();
-                let res = match link_name.as_str() {
-                    "cbrtf" => f_host.cbrt(),
-                    "coshf" => f_host.cosh(),
-                    "sinhf" => f_host.sinh(),
-                    "tanf" => f_host.tan(),
-                    "tanhf" => f_host.tanh(),
-                    "acosf" => f_host.acos(),
-                    "asinf" => f_host.asin(),
-                    "atanf" => f_host.atan(),
-                    "log1pf" => f_host.ln_1p(),
-                    "expm1f" => f_host.exp_m1(),
-                    "tgammaf" => f_host.gamma(),
-                    "erff" => f_host.erf(),
-                    "erfcf" => f_host.erfc(),
-                    _ => bug!(),
-                };
-                let res = res.to_soft();
-                // Apply a relative error of 16ULP to introduce some non-determinism
-                // simulating imprecise implementations and optimizations.
-                // FIXME: temporarily disabled as it breaks std tests.
-                // let res = math::apply_random_float_error_ulp(
-                //     this,
-                //     res,
-                //     4, // log2(16)
-                // );
+
+                let res = math::fixed_float_value(this, link_name.as_str(), &[f]).unwrap_or_else(|| {
+                    // Using host floats (but it's fine, these operations do not have
+                    // guaranteed precision).
+                    let f_host = f.to_host();
+                    let res = match link_name.as_str() {
+                        "cbrtf" => f_host.cbrt(),
+                        "coshf" => f_host.cosh(),
+                        "sinhf" => f_host.sinh(),
+                        "tanf" => f_host.tan(),
+                        "tanhf" => f_host.tanh(),
+                        "acosf" => f_host.acos(),
+                        "asinf" => f_host.asin(),
+                        "atanf" => f_host.atan(),
+                        "log1pf" => f_host.ln_1p(),
+                        "expm1f" => f_host.exp_m1(),
+                        "tgammaf" => f_host.gamma(),
+                        "erff" => f_host.erf(),
+                        "erfcf" => f_host.erfc(),
+                        _ => bug!(),
+                    };
+                    let res = res.to_soft();
+                    // Apply a relative error of 4ULP to introduce some non-determinism
+                    // simulating imprecise implementations and optimizations.
+                    let res = math::apply_random_float_error_ulp(this, res, 4);
+
+                    // Clamp the result to the guaranteed range of this function according to the C standard,
+                    // if any.
+                    math::clamp_float_value(link_name.as_str(), res)
+                });
                 let res = this.adjust_nan(res, &[f]);
                 this.write_scalar(res, dest)?;
             }
@@ -865,24 +869,27 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 let [f1, f2] = this.check_shim_sig_lenient(abi, CanonAbi::C , link_name, args)?;
                 let f1 = this.read_scalar(f1)?.to_f32()?;
                 let f2 = this.read_scalar(f2)?.to_f32()?;
-                // underscore case for windows, here and below
-                // (see https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/floating-point-primitives?view=vs-2019)
-                // Using host floats (but it's fine, these operations do not have guaranteed precision).
-                let res = match link_name.as_str() {
-                    "_hypotf" | "hypotf" => f1.to_host().hypot(f2.to_host()).to_soft(),
-                    "atan2f" => f1.to_host().atan2(f2.to_host()).to_soft(),
-                    #[allow(deprecated)]
-                    "fdimf" => f1.to_host().abs_sub(f2.to_host()).to_soft(),
-                    _ => bug!(),
-                };
-                // Apply a relative error of 16ULP to introduce some non-determinism
-                // simulating imprecise implementations and optimizations.
-                // FIXME: temporarily disabled as it breaks std tests.
-                // let res = math::apply_random_float_error_ulp(
-                //     this,
-                //     res,
-                //     4, // log2(16)
-                // );
+
+                let res = math::fixed_float_value(this, link_name.as_str(), &[f1, f2])
+                    .unwrap_or_else(|| {
+                        let res = match link_name.as_str() {
+                            // underscore case for windows, here and below
+                            // (see https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/floating-point-primitives?view=vs-2019)
+                            // Using host floats (but it's fine, these operations do not have guaranteed precision).
+                            "_hypotf" | "hypotf" => f1.to_host().hypot(f2.to_host()).to_soft(),
+                            "atan2f" => f1.to_host().atan2(f2.to_host()).to_soft(),
+                            #[allow(deprecated)]
+                            "fdimf" => f1.to_host().abs_sub(f2.to_host()).to_soft(),
+                            _ => bug!(),
+                        };
+                        // Apply a relative error of 4ULP to introduce some non-determinism
+                        // simulating imprecise implementations and optimizations.
+                        let res = math::apply_random_float_error_ulp(this, res, 4);
+
+                        // Clamp the result to the guaranteed range of this function according to the C standard,
+                        // if any.
+                        math::clamp_float_value(link_name.as_str(), res)
+                    });
                 let res = this.adjust_nan(res, &[f1, f2]);
                 this.write_scalar(res, dest)?;
             }
@@ -903,33 +910,36 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
             => {
                 let [f] = this.check_shim_sig_lenient(abi, CanonAbi::C , link_name, args)?;
                 let f = this.read_scalar(f)?.to_f64()?;
-                // Using host floats (but it's fine, these operations do not have guaranteed precision).
-                let f_host = f.to_host();
-                let res = match link_name.as_str() {
-                    "cbrt" => f_host.cbrt(),
-                    "cosh" => f_host.cosh(),
-                    "sinh" => f_host.sinh(),
-                    "tan" => f_host.tan(),
-                    "tanh" => f_host.tanh(),
-                    "acos" => f_host.acos(),
-                    "asin" => f_host.asin(),
-                    "atan" => f_host.atan(),
-                    "log1p" => f_host.ln_1p(),
-                    "expm1" => f_host.exp_m1(),
-                    "tgamma" => f_host.gamma(),
-                    "erf" => f_host.erf(),
-                    "erfc" => f_host.erfc(),
-                    _ => bug!(),
-                };
-                let res = res.to_soft();
-                // Apply a relative error of 16ULP to introduce some non-determinism
-                // simulating imprecise implementations and optimizations.
-                // FIXME: temporarily disabled as it breaks std tests.
-                // let res = math::apply_random_float_error_ulp(
-                //     this,
-                //     res.to_soft(),
-                //     4, // log2(16)
-                // );
+
+                let res = math::fixed_float_value(this, link_name.as_str(), &[f]).unwrap_or_else(|| {
+                    // Using host floats (but it's fine, these operations do not have
+                    // guaranteed precision).
+                    let f_host = f.to_host();
+                    let res = match link_name.as_str() {
+                        "cbrt" => f_host.cbrt(),
+                        "cosh" => f_host.cosh(),
+                        "sinh" => f_host.sinh(),
+                        "tan" => f_host.tan(),
+                        "tanh" => f_host.tanh(),
+                        "acos" => f_host.acos(),
+                        "asin" => f_host.asin(),
+                        "atan" => f_host.atan(),
+                        "log1p" => f_host.ln_1p(),
+                        "expm1" => f_host.exp_m1(),
+                        "tgamma" => f_host.gamma(),
+                        "erf" => f_host.erf(),
+                        "erfc" => f_host.erfc(),
+                        _ => bug!(),
+                    };
+                    let res = res.to_soft();
+                    // Apply a relative error of 4ULP to introduce some non-determinism
+                    // simulating imprecise implementations and optimizations.
+                    let res = math::apply_random_float_error_ulp(this, res, 4);
+
+                    // Clamp the result to the guaranteed range of this function according to the C standard,
+                    // if any.
+                    math::clamp_float_value(link_name.as_str(), res)
+                });
                 let res = this.adjust_nan(res, &[f]);
                 this.write_scalar(res, dest)?;
             }
@@ -942,24 +952,26 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 let [f1, f2] = this.check_shim_sig_lenient(abi, CanonAbi::C , link_name, args)?;
                 let f1 = this.read_scalar(f1)?.to_f64()?;
                 let f2 = this.read_scalar(f2)?.to_f64()?;
-                // underscore case for windows, here and below
-                // (see https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/floating-point-primitives?view=vs-2019)
-                // Using host floats (but it's fine, these operations do not have guaranteed precision).
-                let res = match link_name.as_str() {
-                    "_hypot" | "hypot" => f1.to_host().hypot(f2.to_host()).to_soft(),
-                    "atan2" => f1.to_host().atan2(f2.to_host()).to_soft(),
-                    #[allow(deprecated)]
-                    "fdim" => f1.to_host().abs_sub(f2.to_host()).to_soft(),
-                    _ => bug!(),
-                };
-                // Apply a relative error of 16ULP to introduce some non-determinism
-                // simulating imprecise implementations and optimizations.
-                // FIXME: temporarily disabled as it breaks std tests.
-                // let res = math::apply_random_float_error_ulp(
-                //     this,
-                //     res,
-                //     4, // log2(16)
-                // );
+
+                let res = math::fixed_float_value(this, link_name.as_str(), &[f1, f2]).unwrap_or_else(|| {
+                    let res = match link_name.as_str() {
+                        // underscore case for windows, here and below
+                        // (see https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/floating-point-primitives?view=vs-2019)
+                        // Using host floats (but it's fine, these operations do not have guaranteed precision).
+                        "_hypot" | "hypot" => f1.to_host().hypot(f2.to_host()).to_soft(),
+                        "atan2" => f1.to_host().atan2(f2.to_host()).to_soft(),
+                        #[allow(deprecated)]
+                        "fdim" => f1.to_host().abs_sub(f2.to_host()).to_soft(),
+                        _ => bug!(),
+                    };
+                    // Apply a relative error of 4ULP to introduce some non-determinism
+                    // simulating imprecise implementations and optimizations.
+                    let res = math::apply_random_float_error_ulp(this, res, 4);
+
+                    // Clamp the result to the guaranteed range of this function according to the C standard,
+                    // if any.
+                    math::clamp_float_value(link_name.as_str(), res)
+                });
                 let res = this.adjust_nan(res, &[f1, f2]);
                 this.write_scalar(res, dest)?;
             }
@@ -985,11 +997,14 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 // Using host floats (but it's fine, these operations do not have guaranteed precision).
                 let (res, sign) = x.to_host().ln_gamma();
                 this.write_int(sign, &signp)?;
+
                 let res = res.to_soft();
-                // Apply a relative error of 16ULP to introduce some non-determinism
+                // Apply a relative error of 4ULP to introduce some non-determinism
                 // simulating imprecise implementations and optimizations.
-                // FIXME: temporarily disabled as it breaks std tests.
-                // let res = math::apply_random_float_error_ulp(this, res, 4 /* log2(16) */);
+                let res = math::apply_random_float_error_ulp(this, res, 4);
+                // Clamp the result to the guaranteed range of this function according to the C standard,
+                // if any.
+                let res = math::clamp_float_value(link_name.as_str(), res);
                 let res = this.adjust_nan(res, &[x]);
                 this.write_scalar(res, dest)?;
             }
@@ -1001,11 +1016,14 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 // Using host floats (but it's fine, these operations do not have guaranteed precision).
                 let (res, sign) = x.to_host().ln_gamma();
                 this.write_int(sign, &signp)?;
+
                 let res = res.to_soft();
-                // Apply a relative error of 16ULP to introduce some non-determinism
+                // Apply a relative error of 4ULP to introduce some non-determinism
                 // simulating imprecise implementations and optimizations.
-                // FIXME: temporarily disabled as it breaks std tests.
-                // let res = math::apply_random_float_error_ulp(this, res, 4 /* log2(16) */);
+                let res = math::apply_random_float_error_ulp(this, res, 4);
+                // Clamp the result to the guaranteed range of this function according to the C standard,
+                // if any.
+                let res = math::clamp_float_value(link_name.as_str(), res);
                 let res = this.adjust_nan(res, &[x]);
                 this.write_scalar(res, dest)?;
             }
diff --git a/src/tools/miri/tests/pass/float.rs b/src/tools/miri/tests/pass/float.rs
index 1b1163c7797..9f1b3f612b2 100644
--- a/src/tools/miri/tests/pass/float.rs
+++ b/src/tools/miri/tests/pass/float.rs
@@ -1093,6 +1093,8 @@ pub fn libm() {
 
     assert_approx_eq!(1f32.exp_m1(), f32::consts::E - 1.0);
     assert_approx_eq!(1f64.exp_m1(), f64::consts::E - 1.0);
+    assert_approx_eq!(f32::NEG_INFINITY.exp_m1(), -1.0);
+    assert_approx_eq!(f64::NEG_INFINITY.exp_m1(), -1.0);
 
     assert_approx_eq!(10f32.exp2(), 1024f32);
     assert_approx_eq!(50f64.exp2(), 1125899906842624f64);
@@ -1128,6 +1130,7 @@ pub fn libm() {
     assert_eq!(ldexp(0.65f64, 3i32), 5.2f64);
     assert_eq!(ldexp(1.42, 0xFFFF), f64::INFINITY);
     assert_eq!(ldexp(1.42, -0xFFFF), 0f64);
+    assert_eq!(ldexp(42.0, 0), 42.0);
 
     // Trigonometric functions.
 
@@ -1136,8 +1139,13 @@ pub fn libm() {
     assert_approx_eq!((f64::consts::PI / 2f64).sin(), 1f64);
     assert_approx_eq!(f32::consts::FRAC_PI_6.sin(), 0.5);
     assert_approx_eq!(f64::consts::FRAC_PI_6.sin(), 0.5);
-    assert_approx_eq!(f32::consts::FRAC_PI_4.sin().asin(), f32::consts::FRAC_PI_4);
-    assert_approx_eq!(f64::consts::FRAC_PI_4.sin().asin(), f64::consts::FRAC_PI_4);
+    // Increase error tolerance to 16ULP because of the extra operation.
+    assert_approx_eq!(f32::consts::FRAC_PI_4.sin().asin(), f32::consts::FRAC_PI_4, 16);
+    assert_approx_eq!(f64::consts::FRAC_PI_4.sin().asin(), f64::consts::FRAC_PI_4, 16);
+    assert_biteq(0.0f32.asin(), 0.0f32, "asin(+0) = +0");
+    assert_biteq((-0.0f32).asin(), -0.0, "asin(-0) = -0");
+    assert_biteq(0.0f64.asin(), 0.0, "asin(+0) = +0");
+    assert_biteq((-0.0f64).asin(), -0.0, "asin(-0) = -0");
 
     assert_approx_eq!(1.0f32.sinh(), 1.1752012f32);
     assert_approx_eq!(1.0f64.sinh(), 1.1752011936438014f64);
@@ -1164,11 +1172,18 @@ pub fn libm() {
     assert_approx_eq!((f64::consts::PI * 2f64).cos(), 1f64);
     assert_approx_eq!(f32::consts::FRAC_PI_3.cos(), 0.5);
     assert_approx_eq!(f64::consts::FRAC_PI_3.cos(), 0.5);
-    assert_approx_eq!(f32::consts::FRAC_PI_4.cos().acos(), f32::consts::FRAC_PI_4);
-    assert_approx_eq!(f64::consts::FRAC_PI_4.cos().acos(), f64::consts::FRAC_PI_4);
+    // Increase error tolerance to 16ULP because of the extra operation.
+    assert_approx_eq!(f32::consts::FRAC_PI_4.cos().acos(), f32::consts::FRAC_PI_4, 16);
+    assert_approx_eq!(f64::consts::FRAC_PI_4.cos().acos(), f64::consts::FRAC_PI_4, 16);
+    assert_biteq(1.0f32.acos(), 0.0, "acos(1) = 0");
+    assert_biteq(1.0f64.acos(), 0.0, "acos(1) = 0");
 
-    assert_approx_eq!(1.0f32.cosh(), 1.54308f32);
+    assert_approx_eq!(1.0f32.cosh(), 1.5430806f32);
     assert_approx_eq!(1.0f64.cosh(), 1.5430806348152437f64);
+    assert_eq!(0.0f32.cosh(), 1.0);
+    assert_eq!(0.0f64.cosh(), 1.0);
+    assert_eq!((-0.0f32).cosh(), 1.0);
+    assert_eq!((-0.0f64).cosh(), 1.0);
     assert_approx_eq!(2.0f32.acosh(), 1.31695789692481670862504634730796844f32);
     assert_approx_eq!(3.0f64.acosh(), 1.76274717403908605046521864995958461f64);
 
@@ -1178,6 +1193,47 @@ pub fn libm() {
     assert_approx_eq!(1.0_f64, 1.0_f64.tan().atan());
     assert_approx_eq!(1.0f32.atan2(2.0f32), 0.46364761f32);
     assert_approx_eq!(1.0f32.atan2(2.0f32), 0.46364761f32);
+    // C standard defines a bunch of fixed outputs for atan2
+    macro_rules! fixed_atan2_cases{
+        ($float_type:ident) => {{
+            use std::$float_type::consts::{PI, FRAC_PI_2, FRAC_PI_4};
+            use $float_type::{INFINITY, NEG_INFINITY};
+
+            // atan2(±0,−0) = ±π.
+            assert_eq!($float_type::atan2(0.0, -0.0), PI, "atan2(0,−0) = π");
+            assert_eq!($float_type::atan2(-0.0, -0.0), -PI, "atan2(-0,−0) = -π");
+
+            // atan2(±0, y) = ±π for y < 0.
+            assert_eq!($float_type::atan2(0.0, -1.0), PI, "atan2(0, y) = π for y < 0.");
+            assert_eq!($float_type::atan2(-0.0, -1.0), -PI, "atan2(-0, y) = -π for y < 0.");
+
+            // atan2(x, ±0) = −π/2 for x < 0.
+            assert_eq!($float_type::atan2(-1.0, 0.0), -FRAC_PI_2, "atan2(x, 0) = −π/2 for x < 0");
+            assert_eq!($float_type::atan2(-1.0, -0.0), -FRAC_PI_2, "atan2(x, -0) = −π/2 for x < 0");
+
+            // atan2(x, ±0) =  π/2 for x > 0.
+            assert_eq!($float_type::atan2(1.0, 0.0), FRAC_PI_2, "atan2(x, 0) =  π/2 for x > 0.");
+            assert_eq!($float_type::atan2(1.0, -0.0), FRAC_PI_2, "atan2(x, -0) =  π/2 for x > 0.");
+
+            // atan2(±x,−∞) = ±π for finite x > 0.
+            assert_eq!($float_type::atan2(1.0, NEG_INFINITY), PI, "atan2(x, −∞) = π for finite x > 0");
+            assert_eq!($float_type::atan2(-1.0, NEG_INFINITY), -PI, "atan2(-x, −∞) = -π for finite x > 0");
+
+            // atan2(±∞, y) returns ±π/2 for finite y.
+            assert_eq!($float_type::atan2(INFINITY, 1.0), FRAC_PI_2, "atan2(+∞, y) returns π/2 for finite y");
+            assert_eq!($float_type::atan2(NEG_INFINITY, 1.0), -FRAC_PI_2, "atan2(-∞, y) returns -π/2 for finite y");
+
+            // atan2(±∞, −∞) = ±3π/4
+            assert_eq!($float_type::atan2(INFINITY, NEG_INFINITY), 3.0 * FRAC_PI_4, "atan2(+∞, −∞) = 3π/4");
+            assert_eq!($float_type::atan2(NEG_INFINITY, NEG_INFINITY), -3.0 * FRAC_PI_4, "atan2(-∞, −∞) = -3π/4");
+
+            // atan2(±∞, +∞) = ±π/4
+            assert_eq!($float_type::atan2(INFINITY, INFINITY), FRAC_PI_4, "atan2(+∞, +∞) = π/4");
+            assert_eq!($float_type::atan2(NEG_INFINITY, INFINITY), -FRAC_PI_4, "atan2(-∞, +∞) = -π/4");
+        }}
+    }
+    fixed_atan2_cases!(f32);
+    fixed_atan2_cases!(f64);
 
     assert_approx_eq!(
         1.0f32.tanh(),
@@ -1187,6 +1243,11 @@ pub fn libm() {
         1.0f64.tanh(),
         (1.0 - f64::consts::E.powi(-2)) / (1.0 + f64::consts::E.powi(-2))
     );
+    assert_eq!(f32::INFINITY.tanh(), 1.0);
+    assert_eq!(f32::NEG_INFINITY.tanh(), -1.0);
+    assert_eq!(f64::INFINITY.tanh(), 1.0);
+    assert_eq!(f64::NEG_INFINITY.tanh(), -1.0);
+
     assert_approx_eq!(0.5f32.atanh(), 0.54930614433405484569762261846126285f32);
     assert_approx_eq!(0.5f64.atanh(), 0.54930614433405484569762261846126285f64);
 
@@ -1207,8 +1268,14 @@ pub fn libm() {
 
     assert_approx_eq!(1.0f32.erf(), 0.84270079294971486934122063508260926f32);
     assert_approx_eq!(1.0f64.erf(), 0.84270079294971486934122063508260926f64);
+    assert_eq!(f32::INFINITY.erf(), 1.0);
+    assert_eq!(f64::INFINITY.erf(), 1.0);
     assert_approx_eq!(1.0f32.erfc(), 0.15729920705028513065877936491739074f32);
     assert_approx_eq!(1.0f64.erfc(), 0.15729920705028513065877936491739074f64);
+    assert_eq!(f32::NEG_INFINITY.erfc(), 2.0);
+    assert_eq!(f64::NEG_INFINITY.erfc(), 2.0);
+    assert_eq!(f32::INFINITY.erfc(), 0.0);
+    assert_eq!(f64::INFINITY.erfc(), 0.0);
 }
 
 fn test_fast() {
@@ -1418,7 +1485,6 @@ fn test_non_determinism() {
     }
     pub fn test_operations_f32(a: f32, b: f32) {
         test_operations_f!(a, b);
-        // FIXME: some are temporarily disabled as it breaks std tests.
         ensure_nondet(|| a.powf(b));
         ensure_nondet(|| a.powi(2));
         ensure_nondet(|| a.log(b));
@@ -1427,35 +1493,34 @@ fn test_non_determinism() {
         ensure_nondet(|| f32::consts::E.ln());
         ensure_nondet(|| 10f32.log10());
         ensure_nondet(|| 8f32.log2());
-        // ensure_nondet(|| 1f32.ln_1p());
-        // ensure_nondet(|| 27.0f32.cbrt());
-        // ensure_nondet(|| 3.0f32.hypot(4.0f32));
+        ensure_nondet(|| 1f32.ln_1p());
+        ensure_nondet(|| 27.0f32.cbrt());
+        ensure_nondet(|| 3.0f32.hypot(4.0f32));
         ensure_nondet(|| 1f32.sin());
         ensure_nondet(|| 1f32.cos());
         // On i686-pc-windows-msvc , these functions are implemented by calling the `f64` version,
         // which means the little rounding errors Miri introduces are discarded by the cast down to
         // `f32`. Just skip the test for them.
-        // if !cfg!(all(target_os = "windows", target_env = "msvc", target_arch = "x86")) {
-        //     ensure_nondet(|| 1.0f32.tan());
-        //     ensure_nondet(|| 1.0f32.asin());
-        //     ensure_nondet(|| 5.0f32.acos());
-        //     ensure_nondet(|| 1.0f32.atan());
-        //     ensure_nondet(|| 1.0f32.atan2(2.0f32));
-        //     ensure_nondet(|| 1.0f32.sinh());
-        //     ensure_nondet(|| 1.0f32.cosh());
-        //     ensure_nondet(|| 1.0f32.tanh());
-        // }
-        // ensure_nondet(|| 1.0f32.asinh());
-        // ensure_nondet(|| 2.0f32.acosh());
-        // ensure_nondet(|| 0.5f32.atanh());
-        // ensure_nondet(|| 5.0f32.gamma());
-        // ensure_nondet(|| 5.0f32.ln_gamma());
-        // ensure_nondet(|| 5.0f32.erf());
-        // ensure_nondet(|| 5.0f32.erfc());
+        if !cfg!(all(target_os = "windows", target_env = "msvc", target_arch = "x86")) {
+            ensure_nondet(|| 1.0f32.tan());
+            ensure_nondet(|| 1.0f32.asin());
+            ensure_nondet(|| 5.0f32.acos());
+            ensure_nondet(|| 1.0f32.atan());
+            ensure_nondet(|| 1.0f32.atan2(2.0f32));
+            ensure_nondet(|| 1.0f32.sinh());
+            ensure_nondet(|| 1.0f32.cosh());
+            ensure_nondet(|| 1.0f32.tanh());
+        }
+        ensure_nondet(|| 1.0f32.asinh());
+        ensure_nondet(|| 2.0f32.acosh());
+        ensure_nondet(|| 0.5f32.atanh());
+        ensure_nondet(|| 5.0f32.gamma());
+        ensure_nondet(|| 5.0f32.ln_gamma());
+        ensure_nondet(|| 5.0f32.erf());
+        ensure_nondet(|| 5.0f32.erfc());
     }
     pub fn test_operations_f64(a: f64, b: f64) {
         test_operations_f!(a, b);
-        // FIXME: some are temporarily disabled as it breaks std tests.
         ensure_nondet(|| a.powf(b));
         ensure_nondet(|| a.powi(2));
         ensure_nondet(|| a.log(b));
@@ -1464,26 +1529,26 @@ fn test_non_determinism() {
         ensure_nondet(|| 3f64.ln());
         ensure_nondet(|| f64::consts::E.log10());
         ensure_nondet(|| f64::consts::E.log2());
-        // ensure_nondet(|| 1f64.ln_1p());
-        // ensure_nondet(|| 27.0f64.cbrt());
-        // ensure_nondet(|| 3.0f64.hypot(4.0f64));
+        ensure_nondet(|| 1f64.ln_1p());
+        ensure_nondet(|| 27.0f64.cbrt());
+        ensure_nondet(|| 3.0f64.hypot(4.0f64));
         ensure_nondet(|| 1f64.sin());
         ensure_nondet(|| 1f64.cos());
-        // ensure_nondet(|| 1.0f64.tan());
-        // ensure_nondet(|| 1.0f64.asin());
-        // ensure_nondet(|| 5.0f64.acos());
-        // ensure_nondet(|| 1.0f64.atan());
-        // ensure_nondet(|| 1.0f64.atan2(2.0f64));
-        // ensure_nondet(|| 1.0f64.sinh());
-        // ensure_nondet(|| 1.0f64.cosh());
-        // ensure_nondet(|| 1.0f64.tanh());
-        // ensure_nondet(|| 1.0f64.asinh());
-        // ensure_nondet(|| 3.0f64.acosh());
-        // ensure_nondet(|| 0.5f64.atanh());
-        // ensure_nondet(|| 5.0f64.gamma());
-        // ensure_nondet(|| 5.0f64.ln_gamma());
-        // ensure_nondet(|| 5.0f64.erf());
-        // ensure_nondet(|| 5.0f64.erfc());
+        ensure_nondet(|| 1.0f64.tan());
+        ensure_nondet(|| 1.0f64.asin());
+        ensure_nondet(|| 5.0f64.acos());
+        ensure_nondet(|| 1.0f64.atan());
+        ensure_nondet(|| 1.0f64.atan2(2.0f64));
+        ensure_nondet(|| 1.0f64.sinh());
+        ensure_nondet(|| 1.0f64.cosh());
+        ensure_nondet(|| 1.0f64.tanh());
+        ensure_nondet(|| 1.0f64.asinh());
+        ensure_nondet(|| 3.0f64.acosh());
+        ensure_nondet(|| 0.5f64.atanh());
+        ensure_nondet(|| 5.0f64.gamma());
+        ensure_nondet(|| 5.0f64.ln_gamma());
+        ensure_nondet(|| 5.0f64.erf());
+        ensure_nondet(|| 5.0f64.erfc());
     }
     pub fn test_operations_f128(a: f128, b: f128) {
         test_operations_f!(a, b);
diff --git a/tests/assembly-llvm/breakpoint.rs b/tests/assembly-llvm/breakpoint.rs
index e0cc2d1eebb..d119b68e899 100644
--- a/tests/assembly-llvm/breakpoint.rs
+++ b/tests/assembly-llvm/breakpoint.rs
@@ -9,6 +9,7 @@
 // CHECK-LABEL: use_bp
 // aarch64: brk #0xf000
 // x86_64: int3
+#[inline(never)]
 pub fn use_bp() {
     core::arch::breakpoint();
 }
diff --git a/tests/assembly-llvm/simd/reduce-fadd-unordered.rs b/tests/assembly-llvm/simd/reduce-fadd-unordered.rs
index e872826f6ef..fdd03639da0 100644
--- a/tests/assembly-llvm/simd/reduce-fadd-unordered.rs
+++ b/tests/assembly-llvm/simd/reduce-fadd-unordered.rs
@@ -16,6 +16,7 @@ use std::simd::*;
 // It would emit about an extra fadd, depending on the architecture.
 
 // CHECK-LABEL: reduce_fadd_negative_zero
+#[inline(never)]
 pub unsafe fn reduce_fadd_negative_zero(v: f32x4) -> f32 {
     // x86_64: addps
     // x86_64-NEXT: movshdup
diff --git a/tests/auxiliary/minicore.rs b/tests/auxiliary/minicore.rs
index da880100a10..4f4c653cb46 100644
--- a/tests/auxiliary/minicore.rs
+++ b/tests/auxiliary/minicore.rs
@@ -179,7 +179,14 @@ impl Add<isize> for isize {
 
 #[lang = "sync"]
 trait Sync {}
-impl Sync for u8 {}
+impl_marker_trait!(
+    Sync => [
+        char, bool,
+        isize, i8, i16, i32, i64, i128,
+        usize, u8, u16, u32, u64, u128,
+        f16, f32, f64, f128,
+    ]
+);
 
 #[lang = "drop_in_place"]
 fn drop_in_place<T>(_: *mut T) {}
diff --git a/tests/codegen-llvm/amdgpu-addrspacecast.rs b/tests/codegen-llvm/amdgpu-addrspacecast.rs
index 7fe630a7efa..829133de00d 100644
--- a/tests/codegen-llvm/amdgpu-addrspacecast.rs
+++ b/tests/codegen-llvm/amdgpu-addrspacecast.rs
@@ -16,3 +16,12 @@ pub fn ref_of_local(f: fn(&i32)) {
     let i = 0;
     f(&i);
 }
+
+// CHECK-LABEL: @ref_of_global
+// CHECK: addrspacecast (ptr addrspace(1) @I to ptr)
+#[no_mangle]
+pub fn ref_of_global(f: fn(&i32)) {
+    #[no_mangle]
+    static I: i32 = 0;
+    f(&I);
+}
diff --git a/tests/codegen-llvm/cross-crate-inlining/auxiliary/leaf.rs b/tests/codegen-llvm/cross-crate-inlining/auxiliary/leaf.rs
index d059a3d0a73..7b5679c3f4d 100644
--- a/tests/codegen-llvm/cross-crate-inlining/auxiliary/leaf.rs
+++ b/tests/codegen-llvm/cross-crate-inlining/auxiliary/leaf.rs
@@ -18,3 +18,8 @@ pub fn stem_fn() -> String {
 fn inner() -> String {
     String::from("test")
 }
+
+// This function's optimized MIR contains a call, but it is to an intrinsic.
+pub fn leaf_with_intrinsic(a: &[u64; 2], b: &[u64; 2]) -> bool {
+    a == b
+}
diff --git a/tests/codegen-llvm/cross-crate-inlining/leaf-inlining.rs b/tests/codegen-llvm/cross-crate-inlining/leaf-inlining.rs
index 37132312ca9..5e7912791ad 100644
--- a/tests/codegen-llvm/cross-crate-inlining/leaf-inlining.rs
+++ b/tests/codegen-llvm/cross-crate-inlining/leaf-inlining.rs
@@ -18,3 +18,10 @@ pub fn stem_outer() -> String {
     // CHECK: call {{.*}}stem_fn
     leaf::stem_fn()
 }
+
+// Check that we inline functions that call intrinsics
+#[no_mangle]
+pub fn leaf_with_intrinsic_outer(a: &[u64; 2], b: &[u64; 2]) -> bool {
+    // CHECK-NOT: call {{.*}}leaf_with_intrinsic
+    leaf::leaf_with_intrinsic(a, b)
+}
diff --git a/tests/codegen-llvm/default-visibility.rs b/tests/codegen-llvm/default-visibility.rs
index 88ff9fee254..28238e5ef12 100644
--- a/tests/codegen-llvm/default-visibility.rs
+++ b/tests/codegen-llvm/default-visibility.rs
@@ -32,6 +32,7 @@ pub static tested_symbol: [u8; 6] = *b"foobar";
 // INTERPOSABLE: @{{.*}}default_visibility{{.*}}tested_symbol{{.*}} = constant
 // DEFAULT:      @{{.*}}default_visibility{{.*}}tested_symbol{{.*}} = constant
 
+#[inline(never)]
 pub fn do_memcmp(left: &[u8], right: &[u8]) -> i32 {
     left.cmp(right) as i32
 }
diff --git a/tests/codegen-llvm/gpu-kernel-abi.rs b/tests/codegen-llvm/gpu-kernel-abi.rs
index 8ac376d9338..d5a357ef655 100644
--- a/tests/codegen-llvm/gpu-kernel-abi.rs
+++ b/tests/codegen-llvm/gpu-kernel-abi.rs
@@ -1,7 +1,9 @@
 // Checks that the gpu-kernel calling convention correctly translates to LLVM calling conventions.
 
 //@ add-core-stubs
-//@ revisions: nvptx
+//@ revisions: amdgpu nvptx
+//@ [amdgpu] compile-flags: --crate-type=rlib --target=amdgcn-amd-amdhsa -Ctarget-cpu=gfx900
+//@ [amdgpu] needs-llvm-components: amdgpu
 //@ [nvptx] compile-flags: --crate-type=rlib --target=nvptx64-nvidia-cuda
 //@ [nvptx] needs-llvm-components: nvptx
 #![feature(no_core, lang_items, abi_gpu_kernel)]
@@ -10,6 +12,7 @@
 extern crate minicore;
 use minicore::*;
 
+// amdgpu: define amdgpu_kernel void @fun(i32
 // nvptx: define ptx_kernel void @fun(i32
 #[no_mangle]
 pub extern "gpu-kernel" fn fun(_: i32) {}
diff --git a/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-abort.diff b/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-abort.diff
index 5d8aaedae37..a2f10be31a9 100644
--- a/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-abort.diff
+++ b/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-abort.diff
@@ -8,25 +8,23 @@
       let _5: ();
       let mut _6: i32;
       scope 1 {
--         debug x => _1;
-+         debug x => _6;
+          debug x => _1;
           let _2: i32;
           scope 2 {
 -             debug y => _2;
-+             debug y => _6;
++             debug y => _1;
               let _3: i32;
               scope 3 {
 -                 debug z => _3;
-+                 debug z => _6;
++                 debug z => _1;
               }
           }
       }
   
       bb0: {
 -         StorageLive(_1);
--         _1 = val() -> [return: bb1, unwind unreachable];
 +         nop;
-+         _6 = val() -> [return: bb1, unwind unreachable];
+          _1 = val() -> [return: bb1, unwind unreachable];
       }
   
       bb1: {
@@ -49,9 +47,10 @@
           StorageLive(_5);
 -         StorageLive(_6);
 -         _6 = copy _1;
+-         _5 = std::mem::drop::<i32>(move _6) -> [return: bb2, unwind unreachable];
 +         nop;
 +         nop;
-          _5 = std::mem::drop::<i32>(move _6) -> [return: bb2, unwind unreachable];
++         _5 = std::mem::drop::<i32>(move _1) -> [return: bb2, unwind unreachable];
       }
   
       bb2: {
diff --git a/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-unwind.diff b/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-unwind.diff
index 05c9bcc1d73..a08488615b1 100644
--- a/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-unwind.diff
+++ b/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-unwind.diff
@@ -8,25 +8,23 @@
       let _5: ();
       let mut _6: i32;
       scope 1 {
--         debug x => _1;
-+         debug x => _6;
+          debug x => _1;
           let _2: i32;
           scope 2 {
 -             debug y => _2;
-+             debug y => _6;
++             debug y => _1;
               let _3: i32;
               scope 3 {
 -                 debug z => _3;
-+                 debug z => _6;
++                 debug z => _1;
               }
           }
       }
   
       bb0: {
 -         StorageLive(_1);
--         _1 = val() -> [return: bb1, unwind continue];
 +         nop;
-+         _6 = val() -> [return: bb1, unwind continue];
+          _1 = val() -> [return: bb1, unwind continue];
       }
   
       bb1: {
@@ -49,9 +47,10 @@
           StorageLive(_5);
 -         StorageLive(_6);
 -         _6 = copy _1;
+-         _5 = std::mem::drop::<i32>(move _6) -> [return: bb2, unwind continue];
 +         nop;
 +         nop;
-          _5 = std::mem::drop::<i32>(move _6) -> [return: bb2, unwind continue];
++         _5 = std::mem::drop::<i32>(move _1) -> [return: bb2, unwind continue];
       }
   
       bb2: {
diff --git a/tests/mir-opt/dest-prop/dead_stores_79191.f.DestinationPropagation.after.panic-abort.mir b/tests/mir-opt/dest-prop/dead_stores_79191.f.DestinationPropagation.after.panic-abort.mir
index eb4209731c6..15061da8120 100644
--- a/tests/mir-opt/dest-prop/dead_stores_79191.f.DestinationPropagation.after.panic-abort.mir
+++ b/tests/mir-opt/dest-prop/dead_stores_79191.f.DestinationPropagation.after.panic-abort.mir
@@ -7,16 +7,16 @@ fn f(_1: usize) -> usize {
     let mut _3: usize;
     let mut _4: usize;
     scope 1 {
-        debug b => _3;
+        debug b => _2;
     }
 
     bb0: {
         nop;
-        _3 = copy _1;
+        _2 = copy _1;
         _1 = const 5_usize;
         nop;
         nop;
-        _1 = move _3;
+        _1 = move _2;
         nop;
         nop;
         nop;
diff --git a/tests/mir-opt/dest-prop/dead_stores_79191.f.DestinationPropagation.after.panic-unwind.mir b/tests/mir-opt/dest-prop/dead_stores_79191.f.DestinationPropagation.after.panic-unwind.mir
index fe9a7376a58..ddfe4dc5b3e 100644
--- a/tests/mir-opt/dest-prop/dead_stores_79191.f.DestinationPropagation.after.panic-unwind.mir
+++ b/tests/mir-opt/dest-prop/dead_stores_79191.f.DestinationPropagation.after.panic-unwind.mir
@@ -7,16 +7,16 @@ fn f(_1: usize) -> usize {
     let mut _3: usize;
     let mut _4: usize;
     scope 1 {
-        debug b => _3;
+        debug b => _2;
     }
 
     bb0: {
         nop;
-        _3 = copy _1;
+        _2 = copy _1;
         _1 = const 5_usize;
         nop;
         nop;
-        _1 = move _3;
+        _1 = move _2;
         nop;
         nop;
         nop;
diff --git a/tests/mir-opt/dest-prop/dead_stores_better.f.DestinationPropagation.after.panic-abort.mir b/tests/mir-opt/dest-prop/dead_stores_better.f.DestinationPropagation.after.panic-abort.mir
index eb4209731c6..15061da8120 100644
--- a/tests/mir-opt/dest-prop/dead_stores_better.f.DestinationPropagation.after.panic-abort.mir
+++ b/tests/mir-opt/dest-prop/dead_stores_better.f.DestinationPropagation.after.panic-abort.mir
@@ -7,16 +7,16 @@ fn f(_1: usize) -> usize {
     let mut _3: usize;
     let mut _4: usize;
     scope 1 {
-        debug b => _3;
+        debug b => _2;
     }
 
     bb0: {
         nop;
-        _3 = copy _1;
+        _2 = copy _1;
         _1 = const 5_usize;
         nop;
         nop;
-        _1 = move _3;
+        _1 = move _2;
         nop;
         nop;
         nop;
diff --git a/tests/mir-opt/dest-prop/dead_stores_better.f.DestinationPropagation.after.panic-unwind.mir b/tests/mir-opt/dest-prop/dead_stores_better.f.DestinationPropagation.after.panic-unwind.mir
index fe9a7376a58..ddfe4dc5b3e 100644
--- a/tests/mir-opt/dest-prop/dead_stores_better.f.DestinationPropagation.after.panic-unwind.mir
+++ b/tests/mir-opt/dest-prop/dead_stores_better.f.DestinationPropagation.after.panic-unwind.mir
@@ -7,16 +7,16 @@ fn f(_1: usize) -> usize {
     let mut _3: usize;
     let mut _4: usize;
     scope 1 {
-        debug b => _3;
+        debug b => _2;
     }
 
     bb0: {
         nop;
-        _3 = copy _1;
+        _2 = copy _1;
         _1 = const 5_usize;
         nop;
         nop;
-        _1 = move _3;
+        _1 = move _2;
         nop;
         nop;
         nop;
diff --git a/tests/mir-opt/gvn_overlapping.fields.GVN.diff b/tests/mir-opt/gvn_overlapping.fields.GVN.diff
new file mode 100644
index 00000000000..0548f4e42e4
--- /dev/null
+++ b/tests/mir-opt/gvn_overlapping.fields.GVN.diff
@@ -0,0 +1,14 @@
+- // MIR for `fields` before GVN
++ // MIR for `fields` after GVN
+  
+  fn fields(_1: (Adt, Adt)) -> () {
+      let mut _0: ();
+      let mut _2: u32;
+  
+      bb0: {
+          _2 = copy (((_1.0: Adt) as variant#1).0: u32);
+          (_1.1: Adt) = Adt::Some(copy _2);
+          return;
+      }
+  }
+  
diff --git a/tests/mir-opt/gvn_overlapping.rs b/tests/mir-opt/gvn_overlapping.rs
index 99113445e68..f148a843561 100644
--- a/tests/mir-opt/gvn_overlapping.rs
+++ b/tests/mir-opt/gvn_overlapping.rs
@@ -2,11 +2,10 @@
 
 #![feature(custom_mir, core_intrinsics)]
 
-// Check that we do not create overlapping assignments.
-
 use std::intrinsics::mir::*;
 
 // EMIT_MIR gvn_overlapping.overlapping.GVN.diff
+/// Check that we do not create overlapping assignments.
 #[custom_mir(dialect = "runtime")]
 fn overlapping(_17: Adt) {
     // CHECK-LABEL: fn overlapping(
@@ -26,6 +25,45 @@ fn overlapping(_17: Adt) {
     }
 }
 
+// EMIT_MIR gvn_overlapping.stable_projection.GVN.diff
+/// Check that we allow dereferences in the RHS if the LHS is a stable projection.
+#[custom_mir(dialect = "runtime")]
+fn stable_projection(_1: (Adt,)) {
+    // CHECK-LABEL: fn stable_projection(
+    // CHECK: let mut _2: *mut Adt;
+    // CHECK: let mut _4: &Adt;
+    // CHECK: (_1.0: Adt) = copy (*_4);
+    mir! {
+        let _2: *mut Adt;
+        let _3: u32;
+        let _4: &Adt;
+        {
+            _2 = core::ptr::addr_of_mut!(_1.0);
+            _4 = &(*_2);
+            _3 = Field(Variant((*_4), 1), 0);
+            _1.0 = Adt::Some(_3);
+            Return()
+        }
+    }
+}
+
+// EMIT_MIR gvn_overlapping.fields.GVN.diff
+/// Check that we do not create assignments between different fields of the same local.
+#[custom_mir(dialect = "runtime")]
+fn fields(_1: (Adt, Adt)) {
+    // CHECK-LABEL: fn fields(
+    // CHECK: _2 = copy (((_1.0: Adt) as variant#1).0: u32);
+    // CHECK-NEXT: (_1.1: Adt) = Adt::Some(copy _2);
+    mir! {
+        let _2: u32;
+        {
+            _2 = Field(Variant(_1.0, 1), 0);
+            _1.1 = Adt::Some(_2);
+            Return()
+        }
+    }
+}
+
 fn main() {
     overlapping(Adt::Some(0));
 }
diff --git a/tests/mir-opt/gvn_overlapping.stable_projection.GVN.diff b/tests/mir-opt/gvn_overlapping.stable_projection.GVN.diff
new file mode 100644
index 00000000000..08835456591
--- /dev/null
+++ b/tests/mir-opt/gvn_overlapping.stable_projection.GVN.diff
@@ -0,0 +1,19 @@
+- // MIR for `stable_projection` before GVN
++ // MIR for `stable_projection` after GVN
+  
+  fn stable_projection(_1: (Adt,)) -> () {
+      let mut _0: ();
+      let mut _2: *mut Adt;
+      let mut _3: u32;
+      let mut _4: &Adt;
+  
+      bb0: {
+          _2 = &raw mut (_1.0: Adt);
+          _4 = &(*_2);
+          _3 = copy (((*_4) as variant#1).0: u32);
+-         (_1.0: Adt) = Adt::Some(copy _3);
++         (_1.0: Adt) = copy (*_4);
+          return;
+      }
+  }
+  
diff --git a/tests/mir-opt/nrvo_miscompile_111005.rs b/tests/mir-opt/nrvo_miscompile_111005.rs
index 03008fa8191..131f7b8f6f9 100644
--- a/tests/mir-opt/nrvo_miscompile_111005.rs
+++ b/tests/mir-opt/nrvo_miscompile_111005.rs
@@ -1,4 +1,3 @@
-// skip-filecheck
 // This is a miscompilation, #111005 to track
 
 //@ test-mir-pass: RenameReturnPlace
@@ -10,6 +9,11 @@ use core::intrinsics::mir::*;
 // EMIT_MIR nrvo_miscompile_111005.wrong.RenameReturnPlace.diff
 #[custom_mir(dialect = "runtime", phase = "initial")]
 pub fn wrong(arg: char) -> char {
+    // CHECK-LABEL: fn wrong(
+    // CHECK: _0 = copy _1;
+    // FIXME: This is wrong:
+    // CHECK-NEXT: _0 = const 'b';
+    // CHECK-NEXT: return;
     mir! {
         {
             let temp = arg;
diff --git a/tests/mir-opt/pre-codegen/clone_as_copy.enum_clone_as_copy.PreCodegen.after.mir b/tests/mir-opt/pre-codegen/clone_as_copy.enum_clone_as_copy.PreCodegen.after.mir
index 9f88e1961ec..e67f362ee04 100644
--- a/tests/mir-opt/pre-codegen/clone_as_copy.enum_clone_as_copy.PreCodegen.after.mir
+++ b/tests/mir-opt/pre-codegen/clone_as_copy.enum_clone_as_copy.PreCodegen.after.mir
@@ -6,8 +6,8 @@ fn enum_clone_as_copy(_1: &Enum1) -> Enum1 {
     scope 1 (inlined <Enum1 as Clone>::clone) {
         debug self => _1;
         let mut _2: isize;
-        let mut _3: &AllCopy;
-        let mut _4: &NestCopy;
+        let _3: &AllCopy;
+        let _4: &NestCopy;
         scope 2 {
             debug __self_0 => _3;
             scope 6 (inlined <AllCopy as Clone>::clone) {