about summary refs log tree commit diff
path: root/compiler/rustc_mir_transform/src
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_mir_transform/src')
-rw-r--r--compiler/rustc_mir_transform/src/add_retag.rs2
-rw-r--r--compiler/rustc_mir_transform/src/add_subtyping_projections.rs4
-rw-r--r--compiler/rustc_mir_transform/src/check_call_recursion.rs6
-rw-r--r--compiler/rustc_mir_transform/src/check_enums.rs15
-rw-r--r--compiler/rustc_mir_transform/src/check_inline.rs8
-rw-r--r--compiler/rustc_mir_transform/src/check_inline_always_target_features.rs88
-rw-r--r--compiler/rustc_mir_transform/src/check_packed_ref.rs2
-rw-r--r--compiler/rustc_mir_transform/src/coroutine.rs70
-rw-r--r--compiler/rustc_mir_transform/src/coroutine/by_move_body.rs7
-rw-r--r--compiler/rustc_mir_transform/src/coroutine/drop.rs20
-rw-r--r--compiler/rustc_mir_transform/src/coverage/counters.rs1
-rw-r--r--compiler/rustc_mir_transform/src/coverage/counters/node_flow.rs3
-rw-r--r--compiler/rustc_mir_transform/src/coverage/counters/union_find.rs96
-rw-r--r--compiler/rustc_mir_transform/src/coverage/counters/union_find/tests.rs32
-rw-r--r--compiler/rustc_mir_transform/src/coverage/expansion.rs127
-rw-r--r--compiler/rustc_mir_transform/src/coverage/hir_info.rs128
-rw-r--r--compiler/rustc_mir_transform/src/coverage/mappings.rs310
-rw-r--r--compiler/rustc_mir_transform/src/coverage/mod.rs295
-rw-r--r--compiler/rustc_mir_transform/src/coverage/query.rs48
-rw-r--r--compiler/rustc_mir_transform/src/coverage/spans.rs195
-rw-r--r--compiler/rustc_mir_transform/src/coverage/spans/from_mir.rs40
-rw-r--r--compiler/rustc_mir_transform/src/coverage/unexpand.rs48
-rw-r--r--compiler/rustc_mir_transform/src/cross_crate_inline.rs13
-rw-r--r--compiler/rustc_mir_transform/src/ctfe_limit.rs2
-rw-r--r--compiler/rustc_mir_transform/src/dataflow_const_prop.rs38
-rw-r--r--compiler/rustc_mir_transform/src/dest_prop.rs819
-rw-r--r--compiler/rustc_mir_transform/src/elaborate_drop.rs58
-rw-r--r--compiler/rustc_mir_transform/src/elaborate_drops.rs4
-rw-r--r--compiler/rustc_mir_transform/src/errors.rs49
-rw-r--r--compiler/rustc_mir_transform/src/gvn.rs930
-rw-r--r--compiler/rustc_mir_transform/src/impossible_predicates.rs25
-rw-r--r--compiler/rustc_mir_transform/src/inline.rs2
-rw-r--r--compiler/rustc_mir_transform/src/inline/cycle.rs46
-rw-r--r--compiler/rustc_mir_transform/src/instsimplify.rs31
-rw-r--r--compiler/rustc_mir_transform/src/known_panics_lint.rs15
-rw-r--r--compiler/rustc_mir_transform/src/lib.rs9
-rw-r--r--compiler/rustc_mir_transform/src/lint.rs44
-rw-r--r--compiler/rustc_mir_transform/src/lint_tail_expr_drop_order.rs14
-rw-r--r--compiler/rustc_mir_transform/src/nrvo.rs234
-rw-r--r--compiler/rustc_mir_transform/src/pass_manager.rs85
-rw-r--r--compiler/rustc_mir_transform/src/promote_consts.rs26
-rw-r--r--compiler/rustc_mir_transform/src/ref_prop.rs39
-rw-r--r--compiler/rustc_mir_transform/src/remove_noop_landing_pads.rs15
-rw-r--r--compiler/rustc_mir_transform/src/remove_unneeded_drops.rs31
-rw-r--r--compiler/rustc_mir_transform/src/shim.rs20
-rw-r--r--compiler/rustc_mir_transform/src/simplify.rs19
-rw-r--r--compiler/rustc_mir_transform/src/sroa.rs40
-rw-r--r--compiler/rustc_mir_transform/src/ssa.rs4
-rw-r--r--compiler/rustc_mir_transform/src/unreachable_prop.rs2
-rw-r--r--compiler/rustc_mir_transform/src/validate.rs54
50 files changed, 1813 insertions, 2400 deletions
diff --git a/compiler/rustc_mir_transform/src/add_retag.rs b/compiler/rustc_mir_transform/src/add_retag.rs
index 3c29d4624b7..fc08c1df870 100644
--- a/compiler/rustc_mir_transform/src/add_retag.rs
+++ b/compiler/rustc_mir_transform/src/add_retag.rs
@@ -4,7 +4,6 @@
 //! of MIR building, and only after this pass we think of the program has having the
 //! normal MIR semantics.
 
-use rustc_hir::LangItem;
 use rustc_middle::mir::*;
 use rustc_middle::ty::{self, Ty, TyCtxt};
 
@@ -28,7 +27,6 @@ fn may_contain_reference<'tcx>(ty: Ty<'tcx>, depth: u32, tcx: TyCtxt<'tcx>) -> b
         // References and Boxes (`noalias` sources)
         ty::Ref(..) => true,
         ty::Adt(..) if ty.is_box() => true,
-        ty::Adt(adt, _) if tcx.is_lang_item(adt.did(), LangItem::PtrUnique) => true,
         // Compound types: recurse
         ty::Array(ty, _) | ty::Slice(ty) => {
             // This does not branch so we keep the depth the same.
diff --git a/compiler/rustc_mir_transform/src/add_subtyping_projections.rs b/compiler/rustc_mir_transform/src/add_subtyping_projections.rs
index 92ee80eaa35..be4f84d64d0 100644
--- a/compiler/rustc_mir_transform/src/add_subtyping_projections.rs
+++ b/compiler/rustc_mir_transform/src/add_subtyping_projections.rs
@@ -32,8 +32,8 @@ impl<'a, 'tcx> MutVisitor<'tcx> for SubTypeChecker<'a, 'tcx> {
         let mut rval_ty = rvalue.ty(self.local_decls, self.tcx);
         // Not erasing this causes `Free Regions` errors in validator,
         // when rval is `ReStatic`.
-        rval_ty = self.tcx.erase_regions(rval_ty);
-        place_ty = self.tcx.erase_regions(place_ty);
+        rval_ty = self.tcx.erase_and_anonymize_regions(rval_ty);
+        place_ty = self.tcx.erase_and_anonymize_regions(place_ty);
         if place_ty != rval_ty {
             let temp = self
                 .patcher
diff --git a/compiler/rustc_mir_transform/src/check_call_recursion.rs b/compiler/rustc_mir_transform/src/check_call_recursion.rs
index cace4cd6bba..a9acb1da5a3 100644
--- a/compiler/rustc_mir_transform/src/check_call_recursion.rs
+++ b/compiler/rustc_mir_transform/src/check_call_recursion.rs
@@ -21,7 +21,7 @@ impl<'tcx> MirLint<'tcx> for CheckCallRecursion {
 
         if let DefKind::Fn | DefKind::AssocFn = tcx.def_kind(def_id) {
             // If this is trait/impl method, extract the trait's args.
-            let trait_args = match tcx.trait_of_item(def_id.to_def_id()) {
+            let trait_args = match tcx.trait_of_assoc(def_id.to_def_id()) {
                 Some(trait_def_id) => {
                     let trait_args_count = tcx.generics_of(trait_def_id).count();
                     &GenericArgs::identity_for_item(tcx, def_id)[..trait_args_count]
@@ -43,8 +43,8 @@ impl<'tcx> MirLint<'tcx> for CheckDropRecursion {
 
         // First check if `body` is an `fn drop()` of `Drop`
         if let DefKind::AssocFn = tcx.def_kind(def_id)
-        && let Some(trait_ref) =
-            tcx.impl_of_method(def_id.to_def_id()).and_then(|def_id| tcx.impl_trait_ref(def_id))
+        && let Some(impl_id) = tcx.trait_impl_of_assoc(def_id.to_def_id())
+        && let trait_ref = tcx.impl_trait_ref(impl_id).unwrap()
         && tcx.is_lang_item(trait_ref.instantiate_identity().def_id, LangItem::Drop)
         // avoid erroneous `Drop` impls from causing ICEs below
         && let sig = tcx.fn_sig(def_id).instantiate_identity()
diff --git a/compiler/rustc_mir_transform/src/check_enums.rs b/compiler/rustc_mir_transform/src/check_enums.rs
index 33a87cb9873..12447dc7cbb 100644
--- a/compiler/rustc_mir_transform/src/check_enums.rs
+++ b/compiler/rustc_mir_transform/src/check_enums.rs
@@ -48,6 +48,21 @@ impl<'tcx> crate::MirPass<'tcx> for CheckEnums {
                     let new_block = split_block(basic_blocks, location);
 
                     match check {
+                        EnumCheckType::Direct { op_size, .. }
+                        | EnumCheckType::WithNiche { op_size, .. }
+                            if op_size.bytes() == 0 =>
+                        {
+                            // It is never valid to use a ZST as a discriminant for an inhabited enum, but that will
+                            // have been caught by the type checker. Do nothing but ensure that a bug has been signaled.
+                            tcx.dcx().span_delayed_bug(
+                                source_info.span,
+                                "cannot build enum discriminant from zero-sized type",
+                            );
+                            basic_blocks[block].terminator = Some(Terminator {
+                                source_info,
+                                kind: TerminatorKind::Goto { target: new_block },
+                            });
+                        }
                         EnumCheckType::Direct { source_op, discr, op_size, valid_discrs } => {
                             insert_direct_enum_check(
                                 tcx,
diff --git a/compiler/rustc_mir_transform/src/check_inline.rs b/compiler/rustc_mir_transform/src/check_inline.rs
index 14d9532894f..8d28cb3ca00 100644
--- a/compiler/rustc_mir_transform/src/check_inline.rs
+++ b/compiler/rustc_mir_transform/src/check_inline.rs
@@ -1,7 +1,7 @@
 //! Check that a body annotated with `#[rustc_force_inline]` will not fail to inline based on its
 //! definition alone (irrespective of any specific caller).
 
-use rustc_attr_data_structures::InlineAttr;
+use rustc_hir::attrs::InlineAttr;
 use rustc_hir::def_id::DefId;
 use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
 use rustc_middle::mir::{Body, TerminatorKind};
@@ -45,12 +45,6 @@ pub(super) fn is_inline_valid_on_fn<'tcx>(
         return Err("#[rustc_no_mir_inline]");
     }
 
-    // FIXME(#127234): Coverage instrumentation currently doesn't handle inlined
-    // MIR correctly when Modified Condition/Decision Coverage is enabled.
-    if tcx.sess.instrument_coverage_mcdc() {
-        return Err("incompatible with MC/DC coverage");
-    }
-
     let ty = tcx.type_of(def_id);
     if match ty.instantiate_identity().kind() {
         ty::FnDef(..) => tcx.fn_sig(def_id).instantiate_identity().c_variadic(),
diff --git a/compiler/rustc_mir_transform/src/check_inline_always_target_features.rs b/compiler/rustc_mir_transform/src/check_inline_always_target_features.rs
new file mode 100644
index 00000000000..abad28f0a8f
--- /dev/null
+++ b/compiler/rustc_mir_transform/src/check_inline_always_target_features.rs
@@ -0,0 +1,88 @@
+use rustc_hir::attrs::InlineAttr;
+use rustc_middle::middle::codegen_fn_attrs::{TargetFeature, TargetFeatureKind};
+use rustc_middle::mir::{Body, TerminatorKind};
+use rustc_middle::ty::{self, TyCtxt};
+
+use crate::pass_manager::MirLint;
+
+pub(super) struct CheckInlineAlwaysTargetFeature;
+
+impl<'tcx> MirLint<'tcx> for CheckInlineAlwaysTargetFeature {
+    fn run_lint(&self, tcx: TyCtxt<'tcx>, body: &Body<'tcx>) {
+        check_inline_always_target_features(tcx, body)
+    }
+}
+
+/// `#[target_feature]`-annotated functions can be marked `#[inline]` and will only be inlined if
+/// the target features match (as well as all of the other inlining heuristics). `#[inline(always)]`
+/// will always inline regardless of matching target features, which can result in errors from LLVM.
+/// However, it is desirable to be able to always annotate certain functions (e.g. SIMD intrinsics)
+/// as `#[inline(always)]` but check the target features match in Rust to avoid the LLVM errors.
+///
+/// We check the caller and callee target features to ensure that this can
+/// be done or emit a lint.
+#[inline]
+fn check_inline_always_target_features<'tcx>(tcx: TyCtxt<'tcx>, body: &Body<'tcx>) {
+    let caller_def_id = body.source.def_id().expect_local();
+    if !tcx.def_kind(caller_def_id).has_codegen_attrs() {
+        return;
+    }
+
+    let caller_codegen_fn_attrs = tcx.codegen_fn_attrs(caller_def_id);
+
+    for bb in body.basic_blocks.iter() {
+        let terminator = bb.terminator();
+        match &terminator.kind {
+            TerminatorKind::Call { func, .. } | TerminatorKind::TailCall { func, .. } => {
+                let fn_ty = func.ty(body, tcx);
+                let ty::FnDef(callee_def_id, _) = *fn_ty.kind() else {
+                    continue;
+                };
+
+                if !tcx.def_kind(callee_def_id).has_codegen_attrs() {
+                    continue;
+                }
+                let callee_codegen_fn_attrs = tcx.codegen_fn_attrs(callee_def_id);
+                if callee_codegen_fn_attrs.inline != InlineAttr::Always
+                    || callee_codegen_fn_attrs.target_features.is_empty()
+                {
+                    continue;
+                }
+
+                // Scan the users defined target features and ensure they
+                // match the caller.
+                if tcx.is_target_feature_call_safe(
+                    &callee_codegen_fn_attrs.target_features,
+                    &caller_codegen_fn_attrs
+                        .target_features
+                        .iter()
+                        .cloned()
+                        .chain(tcx.sess.target_features.iter().map(|feat| TargetFeature {
+                            name: *feat,
+                            kind: TargetFeatureKind::Implied,
+                        }))
+                        .collect::<Vec<_>>(),
+                ) {
+                    continue;
+                }
+
+                let callee_only: Vec<_> = callee_codegen_fn_attrs
+                    .target_features
+                    .iter()
+                    .filter(|it| !caller_codegen_fn_attrs.target_features.contains(it))
+                    .filter(|it| !matches!(it.kind, TargetFeatureKind::Implied))
+                    .map(|it| it.name.as_str())
+                    .collect();
+
+                crate::errors::emit_inline_always_target_feature_diagnostic(
+                    tcx,
+                    terminator.source_info.span,
+                    callee_def_id,
+                    caller_def_id.into(),
+                    &callee_only,
+                );
+            }
+            _ => (),
+        }
+    }
+}
diff --git a/compiler/rustc_mir_transform/src/check_packed_ref.rs b/compiler/rustc_mir_transform/src/check_packed_ref.rs
index e9b85ba6e9d..100104e9de0 100644
--- a/compiler/rustc_mir_transform/src/check_packed_ref.rs
+++ b/compiler/rustc_mir_transform/src/check_packed_ref.rs
@@ -40,7 +40,7 @@ impl<'tcx> Visitor<'tcx> for PackedRefChecker<'_, 'tcx> {
         if context.is_borrow() && util::is_disaligned(self.tcx, self.body, self.typing_env, *place)
         {
             let def_id = self.body.source.instance.def_id();
-            if let Some(impl_def_id) = self.tcx.impl_of_method(def_id)
+            if let Some(impl_def_id) = self.tcx.trait_impl_of_assoc(def_id)
                 && self.tcx.is_builtin_derived(impl_def_id)
             {
                 // If we ever reach here it means that the generated derive
diff --git a/compiler/rustc_mir_transform/src/coroutine.rs b/compiler/rustc_mir_transform/src/coroutine.rs
index 761d5461a99..c1cd2788348 100644
--- a/compiler/rustc_mir_transform/src/coroutine.rs
+++ b/compiler/rustc_mir_transform/src/coroutine.rs
@@ -1294,7 +1294,9 @@ fn create_coroutine_resume_function<'tcx>(
 
     pm::run_passes_no_validate(tcx, body, &[&abort_unwinding_calls::AbortUnwindingCalls], None);
 
-    dump_mir(tcx, false, "coroutine_resume", &0, body, |_, _| Ok(()));
+    if let Some(dumper) = MirDumper::new(tcx, "coroutine_resume", body) {
+        dumper.dump_mir(body);
+    }
 }
 
 /// An operation that can be performed on a coroutine.
@@ -1338,14 +1340,13 @@ fn create_cases<'tcx>(
                     }
                 }
 
-                if operation == Operation::Resume {
+                if operation == Operation::Resume && point.resume_arg != CTX_ARG.into() {
                     // Move the resume argument to the destination place of the `Yield` terminator
-                    let resume_arg = CTX_ARG;
                     statements.push(Statement::new(
                         source_info,
                         StatementKind::Assign(Box::new((
                             point.resume_arg,
-                            Rvalue::Use(Operand::Move(resume_arg.into())),
+                            Rvalue::Use(Operand::Move(CTX_ARG.into())),
                         ))),
                     ));
                 }
@@ -1437,7 +1438,10 @@ fn check_field_tys_sized<'tcx>(
 }
 
 impl<'tcx> crate::MirPass<'tcx> for StateTransform {
+    #[instrument(level = "debug", skip(self, tcx, body), ret)]
     fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
+        debug!(def_id = ?body.source.def_id());
+
         let Some(old_yield_ty) = body.yield_ty() else {
             // This only applies to coroutines
             return;
@@ -1446,7 +1450,9 @@ impl<'tcx> crate::MirPass<'tcx> for StateTransform {
 
         assert!(body.coroutine_drop().is_none() && body.coroutine_drop_async().is_none());
 
-        dump_mir(tcx, false, "coroutine_before", &0, body, |_, _| Ok(()));
+        if let Some(dumper) = MirDumper::new(tcx, "coroutine_before", body) {
+            dumper.dump_mir(body);
+        }
 
         // The first argument is the coroutine type passed by value
         let coroutine_ty = body.local_decls.raw[1].ty;
@@ -1506,36 +1512,15 @@ impl<'tcx> crate::MirPass<'tcx> for StateTransform {
         ) {
             let context_mut_ref = transform_async_context(tcx, body);
             expand_async_drops(tcx, body, context_mut_ref, coroutine_kind, coroutine_ty);
-            dump_mir(tcx, false, "coroutine_async_drop_expand", &0, body, |_, _| Ok(()));
+
+            if let Some(dumper) = MirDumper::new(tcx, "coroutine_async_drop_expand", body) {
+                dumper.dump_mir(body);
+            }
         } else {
             cleanup_async_drops(body);
         }
 
-        // We also replace the resume argument and insert an `Assign`.
-        // This is needed because the resume argument `_2` might be live across a `yield`, in which
-        // case there is no `Assign` to it that the transform can turn into a store to the coroutine
-        // state. After the yield the slot in the coroutine state would then be uninitialized.
-        let resume_local = CTX_ARG;
-        let resume_ty = body.local_decls[resume_local].ty;
-        let old_resume_local = replace_local(resume_local, resume_ty, body, tcx);
-
-        // When first entering the coroutine, move the resume argument into its old local
-        // (which is now a generator interior).
-        let source_info = SourceInfo::outermost(body.span);
-        let stmts = &mut body.basic_blocks_mut()[START_BLOCK].statements;
-        stmts.insert(
-            0,
-            Statement::new(
-                source_info,
-                StatementKind::Assign(Box::new((
-                    old_resume_local.into(),
-                    Rvalue::Use(Operand::Move(resume_local.into())),
-                ))),
-            ),
-        );
-
         let always_live_locals = always_storage_live_locals(body);
-
         let movable = coroutine_kind.movability() == hir::Movability::Movable;
         let liveness_info =
             locals_live_across_suspend_points(tcx, body, &always_live_locals, movable);
@@ -1576,6 +1561,21 @@ impl<'tcx> crate::MirPass<'tcx> for StateTransform {
         };
         transform.visit_body(body);
 
+        // MIR parameters are not explicitly assigned-to when entering the MIR body.
+        // If we want to save their values inside the coroutine state, we need to do so explicitly.
+        let source_info = SourceInfo::outermost(body.span);
+        let args_iter = body.args_iter();
+        body.basic_blocks.as_mut()[START_BLOCK].statements.splice(
+            0..0,
+            args_iter.filter_map(|local| {
+                let (ty, variant_index, idx) = transform.remap[local]?;
+                let lhs = transform.make_field(variant_index, idx, ty);
+                let rhs = Rvalue::Use(Operand::Move(local.into()));
+                let assign = StatementKind::Assign(Box::new((lhs, rhs)));
+                Some(Statement::new(source_info, assign))
+            }),
+        );
+
         // Update our MIR struct to reflect the changes we've made
         body.arg_count = 2; // self, resume arg
         body.spread_arg = None;
@@ -1605,14 +1605,18 @@ impl<'tcx> crate::MirPass<'tcx> for StateTransform {
         // This is expanded to a drop ladder in `elaborate_coroutine_drops`.
         let drop_clean = insert_clean_drop(tcx, body, has_async_drops);
 
-        dump_mir(tcx, false, "coroutine_pre-elab", &0, body, |_, _| Ok(()));
+        if let Some(dumper) = MirDumper::new(tcx, "coroutine_pre-elab", body) {
+            dumper.dump_mir(body);
+        }
 
         // Expand `drop(coroutine_struct)` to a drop ladder which destroys upvars.
         // If any upvars are moved out of, drop elaboration will handle upvar destruction.
         // However we need to also elaborate the code generated by `insert_clean_drop`.
         elaborate_coroutine_drops(tcx, body);
 
-        dump_mir(tcx, false, "coroutine_post-transform", &0, body, |_, _| Ok(()));
+        if let Some(dumper) = MirDumper::new(tcx, "coroutine_post-transform", body) {
+            dumper.dump_mir(body);
+        }
 
         let can_unwind = can_unwind(tcx, body);
 
@@ -1880,7 +1884,7 @@ fn check_must_not_suspend_ty<'tcx>(
             }
             has_emitted
         }
-        ty::Dynamic(binder, _, _) => {
+        ty::Dynamic(binder, _) => {
             let mut has_emitted = false;
             for predicate in binder.iter() {
                 if let ty::ExistentialPredicate::Trait(ref trait_ref) = predicate.skip_binder() {
diff --git a/compiler/rustc_mir_transform/src/coroutine/by_move_body.rs b/compiler/rustc_mir_transform/src/coroutine/by_move_body.rs
index 81d7b7ba02c..951ff69c19e 100644
--- a/compiler/rustc_mir_transform/src/coroutine/by_move_body.rs
+++ b/compiler/rustc_mir_transform/src/coroutine/by_move_body.rs
@@ -77,7 +77,7 @@ use rustc_hir::definitions::DisambiguatorState;
 use rustc_middle::bug;
 use rustc_middle::hir::place::{Projection, ProjectionKind};
 use rustc_middle::mir::visit::MutVisitor;
-use rustc_middle::mir::{self, dump_mir};
+use rustc_middle::mir::{self, MirDumper};
 use rustc_middle::ty::{self, InstanceKind, Ty, TyCtxt, TypeVisitableExt};
 
 pub(crate) fn coroutine_by_move_body_def_id<'tcx>(
@@ -225,7 +225,10 @@ pub(crate) fn coroutine_by_move_body_def_id<'tcx>(
     );
     by_move_body.source =
         mir::MirSource::from_instance(InstanceKind::Item(body_def.def_id().to_def_id()));
-    dump_mir(tcx, false, "built", &"after", &by_move_body, |_, _| Ok(()));
+
+    if let Some(dumper) = MirDumper::new(tcx, "built", &by_move_body) {
+        dumper.set_disambiguator(&"after").dump_mir(&by_move_body);
+    }
 
     // Feed HIR because we try to access this body's attrs in the inliner.
     body_def.feed_hir();
diff --git a/compiler/rustc_mir_transform/src/coroutine/drop.rs b/compiler/rustc_mir_transform/src/coroutine/drop.rs
index 406575c4f43..fd2d8b2b056 100644
--- a/compiler/rustc_mir_transform/src/coroutine/drop.rs
+++ b/compiler/rustc_mir_transform/src/coroutine/drop.rs
@@ -23,10 +23,10 @@ impl<'tcx> MutVisitor<'tcx> for FixReturnPendingVisitor<'tcx> {
         }
 
         // Converting `_0 = Poll::<Rv>::Pending` to `_0 = Poll::<()>::Pending`
-        if let Rvalue::Aggregate(kind, _) = rvalue {
-            if let AggregateKind::Adt(_, _, ref mut args, _, _) = **kind {
-                *args = self.tcx.mk_args(&[self.tcx.types.unit.into()]);
-            }
+        if let Rvalue::Aggregate(kind, _) = rvalue
+            && let AggregateKind::Adt(_, _, ref mut args, _, _) = **kind
+        {
+            *args = self.tcx.mk_args(&[self.tcx.types.unit.into()]);
         }
     }
 }
@@ -605,7 +605,9 @@ pub(super) fn create_coroutine_drop_shim<'tcx>(
     // Temporary change MirSource to coroutine's instance so that dump_mir produces more sensible
     // filename.
     body.source.instance = coroutine_instance;
-    dump_mir(tcx, false, "coroutine_drop", &0, &body, |_, _| Ok(()));
+    if let Some(dumper) = MirDumper::new(tcx, "coroutine_drop", &body) {
+        dumper.dump_mir(&body);
+    }
     body.source.instance = drop_instance;
 
     // Creating a coroutine drop shim happens on `Analysis(PostCleanup) -> Runtime(Initial)`
@@ -696,7 +698,9 @@ pub(super) fn create_coroutine_drop_shim_async<'tcx>(
         None,
     );
 
-    dump_mir(tcx, false, "coroutine_drop_async", &0, &body, |_, _| Ok(()));
+    if let Some(dumper) = MirDumper::new(tcx, "coroutine_drop_async", &body) {
+        dumper.dump_mir(&body);
+    }
 
     body
 }
@@ -741,7 +745,9 @@ pub(super) fn create_coroutine_drop_shim_proxy_async<'tcx>(
     };
     body.basic_blocks_mut()[call_bb].terminator = Some(Terminator { source_info, kind });
 
-    dump_mir(tcx, false, "coroutine_drop_proxy_async", &0, &body, |_, _| Ok(()));
+    if let Some(dumper) = MirDumper::new(tcx, "coroutine_drop_proxy_async", &body) {
+        dumper.dump_mir(&body);
+    }
 
     body
 }
diff --git a/compiler/rustc_mir_transform/src/coverage/counters.rs b/compiler/rustc_mir_transform/src/coverage/counters.rs
index 5568d42ab8f..879a20e771d 100644
--- a/compiler/rustc_mir_transform/src/coverage/counters.rs
+++ b/compiler/rustc_mir_transform/src/coverage/counters.rs
@@ -16,7 +16,6 @@ use crate::coverage::graph::{BasicCoverageBlock, CoverageGraph};
 
 mod balanced_flow;
 pub(crate) mod node_flow;
-mod union_find;
 
 /// Struct containing the results of [`prepare_bcb_counters_data`].
 pub(crate) struct BcbCountersData {
diff --git a/compiler/rustc_mir_transform/src/coverage/counters/node_flow.rs b/compiler/rustc_mir_transform/src/coverage/counters/node_flow.rs
index 91ed54b8b59..e063f75887b 100644
--- a/compiler/rustc_mir_transform/src/coverage/counters/node_flow.rs
+++ b/compiler/rustc_mir_transform/src/coverage/counters/node_flow.rs
@@ -7,13 +7,12 @@
 //! (Knuth & Stevenson, 1973).
 
 use rustc_data_structures::graph;
+use rustc_data_structures::union_find::UnionFind;
 use rustc_index::bit_set::DenseBitSet;
 use rustc_index::{Idx, IndexSlice, IndexVec};
 pub(crate) use rustc_middle::mir::coverage::NodeFlowData;
 use rustc_middle::mir::coverage::Op;
 
-use crate::coverage::counters::union_find::UnionFind;
-
 #[cfg(test)]
 mod tests;
 
diff --git a/compiler/rustc_mir_transform/src/coverage/counters/union_find.rs b/compiler/rustc_mir_transform/src/coverage/counters/union_find.rs
deleted file mode 100644
index a826a953fa6..00000000000
--- a/compiler/rustc_mir_transform/src/coverage/counters/union_find.rs
+++ /dev/null
@@ -1,96 +0,0 @@
-use std::cmp::Ordering;
-use std::mem;
-
-use rustc_index::{Idx, IndexVec};
-
-#[cfg(test)]
-mod tests;
-
-/// Simple implementation of a union-find data structure, i.e. a disjoint-set
-/// forest.
-#[derive(Debug)]
-pub(crate) struct UnionFind<Key: Idx> {
-    table: IndexVec<Key, UnionFindEntry<Key>>,
-}
-
-#[derive(Debug)]
-struct UnionFindEntry<Key> {
-    /// Transitively points towards the "root" of the set containing this key.
-    ///
-    /// Invariant: A root key is its own parent.
-    parent: Key,
-    /// When merging two "root" keys, their ranks determine which key becomes
-    /// the new root, to prevent the parent tree from becoming unnecessarily
-    /// tall. See [`UnionFind::unify`] for details.
-    rank: u32,
-}
-
-impl<Key: Idx> UnionFind<Key> {
-    /// Creates a new disjoint-set forest containing the keys `0..num_keys`.
-    /// Initially, every key is part of its own one-element set.
-    pub(crate) fn new(num_keys: usize) -> Self {
-        // Initially, every key is the root of its own set, so its parent is itself.
-        Self { table: IndexVec::from_fn_n(|key| UnionFindEntry { parent: key, rank: 0 }, num_keys) }
-    }
-
-    /// Returns the "root" key of the disjoint-set containing the given key.
-    /// If two keys have the same root, they belong to the same set.
-    ///
-    /// Also updates internal data structures to make subsequent `find`
-    /// operations faster.
-    pub(crate) fn find(&mut self, key: Key) -> Key {
-        // Loop until we find a key that is its own parent.
-        let mut curr = key;
-        while let parent = self.table[curr].parent
-            && curr != parent
-        {
-            // Perform "path compression" by peeking one layer ahead, and
-            // setting the current key's parent to that value.
-            // (This works even when `parent` is the root of its set, because
-            // of the invariant that a root is its own parent.)
-            let parent_parent = self.table[parent].parent;
-            self.table[curr].parent = parent_parent;
-
-            // Advance by one step and continue.
-            curr = parent;
-        }
-        curr
-    }
-
-    /// Merges the set containing `a` and the set containing `b` into one set.
-    ///
-    /// Returns the common root of both keys, after the merge.
-    pub(crate) fn unify(&mut self, a: Key, b: Key) -> Key {
-        let mut a = self.find(a);
-        let mut b = self.find(b);
-
-        // If both keys have the same root, they're already in the same set,
-        // so there's nothing more to do.
-        if a == b {
-            return a;
-        };
-
-        // Ensure that `a` has strictly greater rank, swapping if necessary.
-        // If both keys have the same rank, increment the rank of `a` so that
-        // future unifications will also prefer `a`, leading to flatter trees.
-        match Ord::cmp(&self.table[a].rank, &self.table[b].rank) {
-            Ordering::Less => mem::swap(&mut a, &mut b),
-            Ordering::Equal => self.table[a].rank += 1,
-            Ordering::Greater => {}
-        }
-
-        debug_assert!(self.table[a].rank > self.table[b].rank);
-        debug_assert_eq!(self.table[b].parent, b);
-
-        // Make `a` the parent of `b`.
-        self.table[b].parent = a;
-
-        a
-    }
-
-    /// Takes a "snapshot" of the current state of this disjoint-set forest, in
-    /// the form of a vector that directly maps each key to its current root.
-    pub(crate) fn snapshot(&mut self) -> IndexVec<Key, Key> {
-        self.table.indices().map(|key| self.find(key)).collect()
-    }
-}
diff --git a/compiler/rustc_mir_transform/src/coverage/counters/union_find/tests.rs b/compiler/rustc_mir_transform/src/coverage/counters/union_find/tests.rs
deleted file mode 100644
index 34a4e4f8e6e..00000000000
--- a/compiler/rustc_mir_transform/src/coverage/counters/union_find/tests.rs
+++ /dev/null
@@ -1,32 +0,0 @@
-use super::UnionFind;
-
-#[test]
-fn empty() {
-    let mut sets = UnionFind::<u32>::new(10);
-
-    for i in 1..10 {
-        assert_eq!(sets.find(i), i);
-    }
-}
-
-#[test]
-fn transitive() {
-    let mut sets = UnionFind::<u32>::new(10);
-
-    sets.unify(3, 7);
-    sets.unify(4, 2);
-
-    assert_eq!(sets.find(7), sets.find(3));
-    assert_eq!(sets.find(2), sets.find(4));
-    assert_ne!(sets.find(3), sets.find(4));
-
-    sets.unify(7, 4);
-
-    assert_eq!(sets.find(7), sets.find(3));
-    assert_eq!(sets.find(2), sets.find(4));
-    assert_eq!(sets.find(3), sets.find(4));
-
-    for i in [0, 1, 5, 6, 8, 9] {
-        assert_eq!(sets.find(i), i);
-    }
-}
diff --git a/compiler/rustc_mir_transform/src/coverage/expansion.rs b/compiler/rustc_mir_transform/src/coverage/expansion.rs
new file mode 100644
index 00000000000..851bbaeed48
--- /dev/null
+++ b/compiler/rustc_mir_transform/src/coverage/expansion.rs
@@ -0,0 +1,127 @@
+use rustc_data_structures::fx::{FxIndexMap, FxIndexSet, IndexEntry};
+use rustc_middle::mir::coverage::BasicCoverageBlock;
+use rustc_span::{ExpnId, ExpnKind, Span};
+
+#[derive(Clone, Copy, Debug)]
+pub(crate) struct SpanWithBcb {
+    pub(crate) span: Span,
+    pub(crate) bcb: BasicCoverageBlock,
+}
+
+#[derive(Debug)]
+pub(crate) struct ExpnTree {
+    nodes: FxIndexMap<ExpnId, ExpnNode>,
+}
+
+impl ExpnTree {
+    pub(crate) fn get(&self, expn_id: ExpnId) -> Option<&ExpnNode> {
+        self.nodes.get(&expn_id)
+    }
+
+    /// Yields the tree node for the given expansion ID (if present), followed
+    /// by the nodes of all of its descendants in depth-first order.
+    pub(crate) fn iter_node_and_descendants(
+        &self,
+        root_expn_id: ExpnId,
+    ) -> impl Iterator<Item = &ExpnNode> {
+        gen move {
+            let Some(root_node) = self.get(root_expn_id) else { return };
+            yield root_node;
+
+            // Stack of child-node-ID iterators that drives the depth-first traversal.
+            let mut iter_stack = vec![root_node.child_expn_ids.iter()];
+
+            while let Some(curr_iter) = iter_stack.last_mut() {
+                // Pull the next ID from the top of the stack.
+                let Some(&curr_id) = curr_iter.next() else {
+                    iter_stack.pop();
+                    continue;
+                };
+
+                // Yield this node.
+                let Some(node) = self.get(curr_id) else { continue };
+                yield node;
+
+                // Push the node's children, to be traversed next.
+                if !node.child_expn_ids.is_empty() {
+                    iter_stack.push(node.child_expn_ids.iter());
+                }
+            }
+        }
+    }
+}
+
+#[derive(Debug)]
+pub(crate) struct ExpnNode {
+    /// Storing the expansion ID in its own node is not strictly necessary,
+    /// but is helpful for debugging and might be useful later.
+    #[expect(dead_code)]
+    pub(crate) expn_id: ExpnId,
+
+    // Useful info extracted from `ExpnData`.
+    pub(crate) expn_kind: ExpnKind,
+    /// Non-dummy `ExpnData::call_site` span.
+    pub(crate) call_site: Option<Span>,
+    /// Expansion ID of `call_site`, if present.
+    /// This links an expansion node to its parent in the tree.
+    pub(crate) call_site_expn_id: Option<ExpnId>,
+
+    /// Spans (and their associated BCBs) belonging to this expansion.
+    pub(crate) spans: Vec<SpanWithBcb>,
+    /// Expansions whose call-site is in this expansion.
+    pub(crate) child_expn_ids: FxIndexSet<ExpnId>,
+}
+
+impl ExpnNode {
+    fn new(expn_id: ExpnId) -> Self {
+        let expn_data = expn_id.expn_data();
+
+        let call_site = Some(expn_data.call_site).filter(|sp| !sp.is_dummy());
+        let call_site_expn_id = try { call_site?.ctxt().outer_expn() };
+
+        Self {
+            expn_id,
+
+            expn_kind: expn_data.kind,
+            call_site,
+            call_site_expn_id,
+
+            spans: vec![],
+            child_expn_ids: FxIndexSet::default(),
+        }
+    }
+}
+
+/// Given a collection of span/BCB pairs from potentially-different syntax contexts,
+/// arranges them into an "expansion tree" based on their expansion call-sites.
+pub(crate) fn build_expn_tree(spans: impl IntoIterator<Item = SpanWithBcb>) -> ExpnTree {
+    let mut nodes = FxIndexMap::default();
+    let new_node = |&expn_id: &ExpnId| ExpnNode::new(expn_id);
+
+    for span_with_bcb in spans {
+        // Create a node for this span's enclosing expansion, and add the span to it.
+        let expn_id = span_with_bcb.span.ctxt().outer_expn();
+        let node = nodes.entry(expn_id).or_insert_with_key(new_node);
+        node.spans.push(span_with_bcb);
+
+        // Now walk up the expansion call-site chain, creating nodes and registering children.
+        let mut prev = expn_id;
+        let mut curr_expn_id = node.call_site_expn_id;
+        while let Some(expn_id) = curr_expn_id {
+            let entry = nodes.entry(expn_id);
+            let node_existed = matches!(entry, IndexEntry::Occupied(_));
+
+            let node = entry.or_insert_with_key(new_node);
+            node.child_expn_ids.insert(prev);
+
+            if node_existed {
+                break;
+            }
+
+            prev = expn_id;
+            curr_expn_id = node.call_site_expn_id;
+        }
+    }
+
+    ExpnTree { nodes }
+}
diff --git a/compiler/rustc_mir_transform/src/coverage/hir_info.rs b/compiler/rustc_mir_transform/src/coverage/hir_info.rs
new file mode 100644
index 00000000000..28fdc52b06c
--- /dev/null
+++ b/compiler/rustc_mir_transform/src/coverage/hir_info.rs
@@ -0,0 +1,128 @@
+use rustc_hir as hir;
+use rustc_hir::intravisit::{Visitor, walk_expr};
+use rustc_middle::hir::nested_filter;
+use rustc_middle::ty::TyCtxt;
+use rustc_span::Span;
+use rustc_span::def_id::LocalDefId;
+
+/// Function information extracted from HIR by the coverage instrumentor.
+#[derive(Debug)]
+pub(crate) struct ExtractedHirInfo {
+    pub(crate) function_source_hash: u64,
+    pub(crate) is_async_fn: bool,
+    /// The span of the function's signature, if available.
+    /// Must have the same context and filename as the body span.
+    pub(crate) fn_sig_span: Option<Span>,
+    pub(crate) body_span: Span,
+    /// "Holes" are regions within the function body (or its expansions) that
+    /// should not be included in coverage spans for this function
+    /// (e.g. closures and nested items).
+    pub(crate) hole_spans: Vec<Span>,
+}
+
+pub(crate) fn extract_hir_info<'tcx>(tcx: TyCtxt<'tcx>, def_id: LocalDefId) -> ExtractedHirInfo {
+    // FIXME(#79625): Consider improving MIR to provide the information needed, to avoid going back
+    // to HIR for it.
+
+    // HACK: For synthetic MIR bodies (async closures), use the def id of the HIR body.
+    if tcx.is_synthetic_mir(def_id) {
+        return extract_hir_info(tcx, tcx.local_parent(def_id));
+    }
+
+    let hir_node = tcx.hir_node_by_def_id(def_id);
+    let fn_body_id = hir_node.body_id().expect("HIR node is a function with body");
+    let hir_body = tcx.hir_body(fn_body_id);
+
+    let maybe_fn_sig = hir_node.fn_sig();
+    let is_async_fn = maybe_fn_sig.is_some_and(|fn_sig| fn_sig.header.is_async());
+
+    let mut body_span = hir_body.value.span;
+
+    use hir::{Closure, Expr, ExprKind, Node};
+    // Unexpand a closure's body span back to the context of its declaration.
+    // This helps with closure bodies that consist of just a single bang-macro,
+    // and also with closure bodies produced by async desugaring.
+    if let Node::Expr(&Expr { kind: ExprKind::Closure(&Closure { fn_decl_span, .. }), .. }) =
+        hir_node
+    {
+        body_span = body_span.find_ancestor_in_same_ctxt(fn_decl_span).unwrap_or(body_span);
+    }
+
+    // The actual signature span is only used if it has the same context and
+    // filename as the body, and precedes the body.
+    let fn_sig_span = maybe_fn_sig.map(|fn_sig| fn_sig.span).filter(|&fn_sig_span| {
+        let source_map = tcx.sess.source_map();
+        let file_idx = |span: Span| source_map.lookup_source_file_idx(span.lo());
+
+        fn_sig_span.eq_ctxt(body_span)
+            && fn_sig_span.hi() <= body_span.lo()
+            && file_idx(fn_sig_span) == file_idx(body_span)
+    });
+
+    let function_source_hash = hash_mir_source(tcx, hir_body);
+
+    let hole_spans = extract_hole_spans_from_hir(tcx, hir_body);
+
+    ExtractedHirInfo { function_source_hash, is_async_fn, fn_sig_span, body_span, hole_spans }
+}
+
+fn hash_mir_source<'tcx>(tcx: TyCtxt<'tcx>, hir_body: &'tcx hir::Body<'tcx>) -> u64 {
+    let owner = hir_body.id().hir_id.owner;
+    tcx.hir_owner_nodes(owner)
+        .opt_hash_including_bodies
+        .expect("hash should be present when coverage instrumentation is enabled")
+        .to_smaller_hash()
+        .as_u64()
+}
+
+fn extract_hole_spans_from_hir<'tcx>(tcx: TyCtxt<'tcx>, hir_body: &hir::Body<'tcx>) -> Vec<Span> {
+    struct HolesVisitor<'tcx> {
+        tcx: TyCtxt<'tcx>,
+        hole_spans: Vec<Span>,
+    }
+
+    impl<'tcx> Visitor<'tcx> for HolesVisitor<'tcx> {
+        /// We have special handling for nested items, but we still want to
+        /// traverse into nested bodies of things that are not considered items,
+        /// such as "anon consts" (e.g. array lengths).
+        type NestedFilter = nested_filter::OnlyBodies;
+
+        fn maybe_tcx(&mut self) -> TyCtxt<'tcx> {
+            self.tcx
+        }
+
+        /// We override `visit_nested_item` instead of `visit_item` because we
+        /// only need the item's span, not the item itself.
+        fn visit_nested_item(&mut self, id: hir::ItemId) -> Self::Result {
+            let span = self.tcx.def_span(id.owner_id.def_id);
+            self.visit_hole_span(span);
+            // Having visited this item, we don't care about its children,
+            // so don't call `walk_item`.
+        }
+
+        // We override `visit_expr` instead of the more specific expression
+        // visitors, so that we have direct access to the expression span.
+        fn visit_expr(&mut self, expr: &'tcx hir::Expr<'tcx>) {
+            match expr.kind {
+                hir::ExprKind::Closure(_) | hir::ExprKind::ConstBlock(_) => {
+                    self.visit_hole_span(expr.span);
+                    // Having visited this expression, we don't care about its
+                    // children, so don't call `walk_expr`.
+                }
+
+                // For other expressions, recursively visit as normal.
+                _ => walk_expr(self, expr),
+            }
+        }
+    }
+    impl HolesVisitor<'_> {
+        fn visit_hole_span(&mut self, hole_span: Span) {
+            self.hole_spans.push(hole_span);
+        }
+    }
+
+    let mut visitor = HolesVisitor { tcx, hole_spans: vec![] };
+
+    visitor.visit_body(hir_body);
+    visitor.hole_spans
+}
diff --git a/compiler/rustc_mir_transform/src/coverage/mappings.rs b/compiler/rustc_mir_transform/src/coverage/mappings.rs
index b4b4d0416fb..8dbe564f517 100644
--- a/compiler/rustc_mir_transform/src/coverage/mappings.rs
+++ b/compiler/rustc_mir_transform/src/coverage/mappings.rs
@@ -1,123 +1,36 @@
-use std::collections::BTreeSet;
-
-use rustc_data_structures::fx::FxIndexMap;
 use rustc_index::IndexVec;
 use rustc_middle::mir::coverage::{
-    BlockMarkerId, BranchSpan, ConditionId, ConditionInfo, CoverageInfoHi, CoverageKind,
+    BlockMarkerId, BranchSpan, CoverageInfoHi, CoverageKind, Mapping, MappingKind,
 };
 use rustc_middle::mir::{self, BasicBlock, StatementKind};
 use rustc_middle::ty::TyCtxt;
-use rustc_span::Span;
 
-use crate::coverage::ExtractedHirInfo;
-use crate::coverage::graph::{BasicCoverageBlock, CoverageGraph, START_BCB};
+use crate::coverage::graph::CoverageGraph;
+use crate::coverage::hir_info::ExtractedHirInfo;
 use crate::coverage::spans::extract_refined_covspans;
 use crate::coverage::unexpand::unexpand_into_body_span;
-use crate::errors::MCDCExceedsTestVectorLimit;
-
-/// Associates an ordinary executable code span with its corresponding BCB.
-#[derive(Debug)]
-pub(super) struct CodeMapping {
-    pub(super) span: Span,
-    pub(super) bcb: BasicCoverageBlock,
-}
-
-/// This is separate from [`MCDCBranch`] to help prepare for larger changes
-/// that will be needed for improved branch coverage in the future.
-/// (See <https://github.com/rust-lang/rust/pull/124217>.)
-#[derive(Debug)]
-pub(super) struct BranchPair {
-    pub(super) span: Span,
-    pub(super) true_bcb: BasicCoverageBlock,
-    pub(super) false_bcb: BasicCoverageBlock,
-}
-
-/// Associates an MC/DC branch span with condition info besides fields for normal branch.
-#[derive(Debug)]
-pub(super) struct MCDCBranch {
-    pub(super) span: Span,
-    pub(super) true_bcb: BasicCoverageBlock,
-    pub(super) false_bcb: BasicCoverageBlock,
-    pub(super) condition_info: ConditionInfo,
-    // Offset added to test vector idx if this branch is evaluated to true.
-    pub(super) true_index: usize,
-    // Offset added to test vector idx if this branch is evaluated to false.
-    pub(super) false_index: usize,
-}
-
-/// Associates an MC/DC decision with its join BCBs.
-#[derive(Debug)]
-pub(super) struct MCDCDecision {
-    pub(super) span: Span,
-    pub(super) end_bcbs: BTreeSet<BasicCoverageBlock>,
-    pub(super) bitmap_idx: usize,
-    pub(super) num_test_vectors: usize,
-    pub(super) decision_depth: u16,
-}
-
-// LLVM uses `i32` to index the bitmap. Thus `i32::MAX` is the hard limit for number of all test vectors
-// in a function.
-const MCDC_MAX_BITMAP_SIZE: usize = i32::MAX as usize;
 
 #[derive(Default)]
-pub(super) struct ExtractedMappings {
-    pub(super) code_mappings: Vec<CodeMapping>,
-    pub(super) branch_pairs: Vec<BranchPair>,
-    pub(super) mcdc_bitmap_bits: usize,
-    pub(super) mcdc_degraded_branches: Vec<MCDCBranch>,
-    pub(super) mcdc_mappings: Vec<(MCDCDecision, Vec<MCDCBranch>)>,
+pub(crate) struct ExtractedMappings {
+    pub(crate) mappings: Vec<Mapping>,
 }
 
-/// Extracts coverage-relevant spans from MIR, and associates them with
-/// their corresponding BCBs.
-pub(super) fn extract_all_mapping_info_from_mir<'tcx>(
+/// Extracts coverage-relevant spans from MIR, and uses them to create
+/// coverage mapping data for inclusion in MIR.
+pub(crate) fn extract_mappings_from_mir<'tcx>(
     tcx: TyCtxt<'tcx>,
     mir_body: &mir::Body<'tcx>,
     hir_info: &ExtractedHirInfo,
     graph: &CoverageGraph,
 ) -> ExtractedMappings {
-    let mut code_mappings = vec![];
-    let mut branch_pairs = vec![];
-    let mut mcdc_bitmap_bits = 0;
-    let mut mcdc_degraded_branches = vec![];
-    let mut mcdc_mappings = vec![];
+    let mut mappings = vec![];
 
-    if hir_info.is_async_fn || tcx.sess.coverage_no_mir_spans() {
-        // An async function desugars into a function that returns a future,
-        // with the user code wrapped in a closure. Any spans in the desugared
-        // outer function will be unhelpful, so just keep the signature span
-        // and ignore all of the spans in the MIR body.
-        //
-        // When debugging flag `-Zcoverage-options=no-mir-spans` is set, we need
-        // to give the same treatment to _all_ functions, because `llvm-cov`
-        // seems to ignore functions that don't have any ordinary code spans.
-        if let Some(span) = hir_info.fn_sig_span {
-            code_mappings.push(CodeMapping { span, bcb: START_BCB });
-        }
-    } else {
-        // Extract coverage spans from MIR statements/terminators as normal.
-        extract_refined_covspans(tcx, mir_body, hir_info, graph, &mut code_mappings);
-    }
+    // Extract ordinary code mappings from MIR statement/terminator spans.
+    extract_refined_covspans(tcx, mir_body, hir_info, graph, &mut mappings);
 
-    branch_pairs.extend(extract_branch_pairs(mir_body, hir_info, graph));
+    extract_branch_mappings(mir_body, hir_info, graph, &mut mappings);
 
-    extract_mcdc_mappings(
-        mir_body,
-        tcx,
-        hir_info.body_span,
-        graph,
-        &mut mcdc_bitmap_bits,
-        &mut mcdc_degraded_branches,
-        &mut mcdc_mappings,
-    );
-
-    ExtractedMappings {
-        code_mappings,
-        branch_pairs,
-        mcdc_bitmap_bits,
-        mcdc_degraded_branches,
-        mcdc_mappings,
-    }
+    ExtractedMappings { mappings }
 }
 
 fn resolve_block_markers(
@@ -141,25 +54,18 @@ fn resolve_block_markers(
     block_markers
 }
 
-// FIXME: There is currently a lot of redundancy between
-// `extract_branch_pairs` and `extract_mcdc_mappings`. This is needed so
-// that they can each be modified without interfering with the other, but in
-// the long term we should try to bring them together again when branch coverage
-// and MC/DC coverage support are more mature.
-
-pub(super) fn extract_branch_pairs(
+pub(super) fn extract_branch_mappings(
     mir_body: &mir::Body<'_>,
     hir_info: &ExtractedHirInfo,
     graph: &CoverageGraph,
-) -> Vec<BranchPair> {
-    let Some(coverage_info_hi) = mir_body.coverage_info_hi.as_deref() else { return vec![] };
+    mappings: &mut Vec<Mapping>,
+) {
+    let Some(coverage_info_hi) = mir_body.coverage_info_hi.as_deref() else { return };
 
     let block_markers = resolve_block_markers(coverage_info_hi, mir_body);
 
-    coverage_info_hi
-        .branch_spans
-        .iter()
-        .filter_map(|&BranchSpan { span: raw_span, true_marker, false_marker }| {
+    mappings.extend(coverage_info_hi.branch_spans.iter().filter_map(
+        |&BranchSpan { span: raw_span, true_marker, false_marker }| try {
             // For now, ignore any branch span that was introduced by
             // expansion. This makes things like assert macros less noisy.
             if !raw_span.ctxt().outer_expn_data().is_root() {
@@ -172,179 +78,7 @@ pub(super) fn extract_branch_pairs(
             let true_bcb = bcb_from_marker(true_marker)?;
             let false_bcb = bcb_from_marker(false_marker)?;
 
-            Some(BranchPair { span, true_bcb, false_bcb })
-        })
-        .collect::<Vec<_>>()
-}
-
-pub(super) fn extract_mcdc_mappings(
-    mir_body: &mir::Body<'_>,
-    tcx: TyCtxt<'_>,
-    body_span: Span,
-    graph: &CoverageGraph,
-    mcdc_bitmap_bits: &mut usize,
-    mcdc_degraded_branches: &mut impl Extend<MCDCBranch>,
-    mcdc_mappings: &mut impl Extend<(MCDCDecision, Vec<MCDCBranch>)>,
-) {
-    let Some(coverage_info_hi) = mir_body.coverage_info_hi.as_deref() else { return };
-
-    let block_markers = resolve_block_markers(coverage_info_hi, mir_body);
-
-    let bcb_from_marker = |marker: BlockMarkerId| graph.bcb_from_bb(block_markers[marker]?);
-
-    let check_branch_bcb =
-        |raw_span: Span, true_marker: BlockMarkerId, false_marker: BlockMarkerId| {
-            // For now, ignore any branch span that was introduced by
-            // expansion. This makes things like assert macros less noisy.
-            if !raw_span.ctxt().outer_expn_data().is_root() {
-                return None;
-            }
-            let span = unexpand_into_body_span(raw_span, body_span)?;
-
-            let true_bcb = bcb_from_marker(true_marker)?;
-            let false_bcb = bcb_from_marker(false_marker)?;
-            Some((span, true_bcb, false_bcb))
-        };
-
-    let to_mcdc_branch = |&mir::coverage::MCDCBranchSpan {
-                              span: raw_span,
-                              condition_info,
-                              true_marker,
-                              false_marker,
-                          }| {
-        let (span, true_bcb, false_bcb) = check_branch_bcb(raw_span, true_marker, false_marker)?;
-        Some(MCDCBranch {
-            span,
-            true_bcb,
-            false_bcb,
-            condition_info,
-            true_index: usize::MAX,
-            false_index: usize::MAX,
-        })
-    };
-
-    let mut get_bitmap_idx = |num_test_vectors: usize| -> Option<usize> {
-        let bitmap_idx = *mcdc_bitmap_bits;
-        let next_bitmap_bits = bitmap_idx.saturating_add(num_test_vectors);
-        (next_bitmap_bits <= MCDC_MAX_BITMAP_SIZE).then(|| {
-            *mcdc_bitmap_bits = next_bitmap_bits;
-            bitmap_idx
-        })
-    };
-    mcdc_degraded_branches
-        .extend(coverage_info_hi.mcdc_degraded_branch_spans.iter().filter_map(to_mcdc_branch));
-
-    mcdc_mappings.extend(coverage_info_hi.mcdc_spans.iter().filter_map(|(decision, branches)| {
-        if branches.len() == 0 {
-            return None;
-        }
-        let decision_span = unexpand_into_body_span(decision.span, body_span)?;
-
-        let end_bcbs = decision
-            .end_markers
-            .iter()
-            .map(|&marker| bcb_from_marker(marker))
-            .collect::<Option<_>>()?;
-        let mut branch_mappings: Vec<_> = branches.into_iter().filter_map(to_mcdc_branch).collect();
-        if branch_mappings.len() != branches.len() {
-            mcdc_degraded_branches.extend(branch_mappings);
-            return None;
-        }
-        let num_test_vectors = calc_test_vectors_index(&mut branch_mappings);
-        let Some(bitmap_idx) = get_bitmap_idx(num_test_vectors) else {
-            tcx.dcx().emit_warn(MCDCExceedsTestVectorLimit {
-                span: decision_span,
-                max_num_test_vectors: MCDC_MAX_BITMAP_SIZE,
-            });
-            mcdc_degraded_branches.extend(branch_mappings);
-            return None;
-        };
-        // LLVM requires span of the decision contains all spans of its conditions.
-        // Usually the decision span meets the requirement well but in cases like macros it may not.
-        let span = branch_mappings
-            .iter()
-            .map(|branch| branch.span)
-            .reduce(|lhs, rhs| lhs.to(rhs))
-            .map(
-                |joint_span| {
-                    if decision_span.contains(joint_span) { decision_span } else { joint_span }
-                },
-            )
-            .expect("branch mappings are ensured to be non-empty as checked above");
-        Some((
-            MCDCDecision {
-                span,
-                end_bcbs,
-                bitmap_idx,
-                num_test_vectors,
-                decision_depth: decision.decision_depth,
-            },
-            branch_mappings,
-        ))
-    }));
-}
-
-// LLVM checks the executed test vector by accumulating indices of tested branches.
-// We calculate number of all possible test vectors of the decision and assign indices
-// to branches here.
-// See [the rfc](https://discourse.llvm.org/t/rfc-coverage-new-algorithm-and-file-format-for-mc-dc/76798/)
-// for more details about the algorithm.
-// This function is mostly like [`TVIdxBuilder::TvIdxBuilder`](https://github.com/llvm/llvm-project/blob/d594d9f7f4dc6eb748b3261917db689fdc348b96/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp#L226)
-fn calc_test_vectors_index(conditions: &mut Vec<MCDCBranch>) -> usize {
-    let mut indegree_stats = IndexVec::<ConditionId, usize>::from_elem_n(0, conditions.len());
-    // `num_paths` is `width` described at the llvm rfc, which indicates how many paths reaching the condition node.
-    let mut num_paths_stats = IndexVec::<ConditionId, usize>::from_elem_n(0, conditions.len());
-    let mut next_conditions = conditions
-        .iter_mut()
-        .map(|branch| {
-            let ConditionInfo { condition_id, true_next_id, false_next_id } = branch.condition_info;
-            [true_next_id, false_next_id]
-                .into_iter()
-                .flatten()
-                .for_each(|next_id| indegree_stats[next_id] += 1);
-            (condition_id, branch)
-        })
-        .collect::<FxIndexMap<_, _>>();
-
-    let mut queue =
-        std::collections::VecDeque::from_iter(next_conditions.swap_remove(&ConditionId::START));
-    num_paths_stats[ConditionId::START] = 1;
-    let mut decision_end_nodes = Vec::new();
-    while let Some(branch) = queue.pop_front() {
-        let ConditionInfo { condition_id, true_next_id, false_next_id } = branch.condition_info;
-        let (false_index, true_index) = (&mut branch.false_index, &mut branch.true_index);
-        let this_paths_count = num_paths_stats[condition_id];
-        // Note. First check the false next to ensure conditions are touched in same order with llvm-cov.
-        for (next, index) in [(false_next_id, false_index), (true_next_id, true_index)] {
-            if let Some(next_id) = next {
-                let next_paths_count = &mut num_paths_stats[next_id];
-                *index = *next_paths_count;
-                *next_paths_count = next_paths_count.saturating_add(this_paths_count);
-                let next_indegree = &mut indegree_stats[next_id];
-                *next_indegree -= 1;
-                if *next_indegree == 0 {
-                    queue.push_back(next_conditions.swap_remove(&next_id).expect(
-                        "conditions with non-zero indegree before must be in next_conditions",
-                    ));
-                }
-            } else {
-                decision_end_nodes.push((this_paths_count, condition_id, index));
-            }
-        }
-    }
-    assert!(next_conditions.is_empty(), "the decision tree has untouched nodes");
-    let mut cur_idx = 0;
-    // LLVM hopes the end nodes are sorted in descending order by `num_paths` so that it can
-    // optimize bitmap size for decisions in tree form such as `a && b && c && d && ...`.
-    decision_end_nodes.sort_by_key(|(num_paths, _, _)| usize::MAX - *num_paths);
-    for (num_paths, condition_id, index) in decision_end_nodes {
-        assert_eq!(
-            num_paths, num_paths_stats[condition_id],
-            "end nodes should not be updated since they were visited"
-        );
-        assert_eq!(*index, usize::MAX, "end nodes should not be assigned index before");
-        *index = cur_idx;
-        cur_idx += num_paths;
-    }
-    cur_idx
+            Mapping { span, kind: MappingKind::Branch { true_bcb, false_bcb } }
+        },
+    ));
 }
diff --git a/compiler/rustc_mir_transform/src/coverage/mod.rs b/compiler/rustc_mir_transform/src/coverage/mod.rs
index f253d1662ca..08c7d346009 100644
--- a/compiler/rustc_mir_transform/src/coverage/mod.rs
+++ b/compiler/rustc_mir_transform/src/coverage/mod.rs
@@ -1,5 +1,16 @@
+use rustc_middle::mir::coverage::{CoverageKind, FunctionCoverageInfo};
+use rustc_middle::mir::{self, BasicBlock, Statement, StatementKind, TerminatorKind};
+use rustc_middle::ty::TyCtxt;
+use tracing::{debug, debug_span, trace};
+
+use crate::coverage::counters::BcbCountersData;
+use crate::coverage::graph::CoverageGraph;
+use crate::coverage::mappings::ExtractedMappings;
+
 mod counters;
+mod expansion;
 mod graph;
+mod hir_info;
 mod mappings;
 pub(super) mod query;
 mod spans;
@@ -7,22 +18,6 @@ mod spans;
 mod tests;
 mod unexpand;
 
-use rustc_hir as hir;
-use rustc_hir::intravisit::{Visitor, walk_expr};
-use rustc_middle::hir::nested_filter;
-use rustc_middle::mir::coverage::{
-    CoverageKind, DecisionInfo, FunctionCoverageInfo, Mapping, MappingKind,
-};
-use rustc_middle::mir::{self, BasicBlock, Statement, StatementKind, TerminatorKind};
-use rustc_middle::ty::TyCtxt;
-use rustc_span::Span;
-use rustc_span::def_id::LocalDefId;
-use tracing::{debug, debug_span, trace};
-
-use crate::coverage::counters::BcbCountersData;
-use crate::coverage::graph::CoverageGraph;
-use crate::coverage::mappings::ExtractedMappings;
-
 /// Inserts `StatementKind::Coverage` statements that either instrument the binary with injected
 /// counters, via intrinsic `llvm.instrprof.increment`, and/or inject metadata used during codegen
 /// to construct the coverage map.
@@ -69,7 +64,7 @@ fn instrument_function_for_coverage<'tcx>(tcx: TyCtxt<'tcx>, mir_body: &mut mir:
     let def_id = mir_body.source.def_id();
     let _span = debug_span!("instrument_function_for_coverage", ?def_id).entered();
 
-    let hir_info = extract_hir_info(tcx, def_id.expect_local());
+    let hir_info = hir_info::extract_hir_info(tcx, def_id.expect_local());
 
     // Build the coverage graph, which is a simplified view of the MIR control-flow
     // graph that ignores some details not relevant to coverage instrumentation.
@@ -77,10 +72,8 @@ fn instrument_function_for_coverage<'tcx>(tcx: TyCtxt<'tcx>, mir_body: &mut mir:
 
     ////////////////////////////////////////////////////
     // Extract coverage spans and other mapping info from MIR.
-    let extracted_mappings =
-        mappings::extract_all_mapping_info_from_mir(tcx, mir_body, &hir_info, &graph);
-
-    let mappings = create_mappings(&extracted_mappings);
+    let ExtractedMappings { mappings } =
+        mappings::extract_mappings_from_mir(tcx, mir_body, &hir_info, &graph);
     if mappings.is_empty() {
         // No spans could be converted into valid mappings, so skip this function.
         debug!("no spans could be converted into valid mappings; skipping");
@@ -95,14 +88,6 @@ fn instrument_function_for_coverage<'tcx>(tcx: TyCtxt<'tcx>, mir_body: &mut mir:
 
     // Inject coverage statements into MIR.
     inject_coverage_statements(mir_body, &graph);
-    inject_mcdc_statements(mir_body, &graph, &extracted_mappings);
-
-    let mcdc_num_condition_bitmaps = extracted_mappings
-        .mcdc_mappings
-        .iter()
-        .map(|&(mappings::MCDCDecision { decision_depth, .. }, _)| decision_depth)
-        .max()
-        .map_or(0, |max| usize::from(max) + 1);
 
     mir_body.function_coverage_info = Some(Box::new(FunctionCoverageInfo {
         function_source_hash: hir_info.function_source_hash,
@@ -111,97 +96,9 @@ fn instrument_function_for_coverage<'tcx>(tcx: TyCtxt<'tcx>, mir_body: &mut mir:
         priority_list,
 
         mappings,
-
-        mcdc_bitmap_bits: extracted_mappings.mcdc_bitmap_bits,
-        mcdc_num_condition_bitmaps,
     }));
 }
 
-/// For each coverage span extracted from MIR, create a corresponding mapping.
-///
-/// FIXME(Zalathar): This used to be where BCBs in the extracted mappings were
-/// resolved to a `CovTerm`. But that is now handled elsewhere, so this
-/// function can potentially be simplified even further.
-fn create_mappings(extracted_mappings: &ExtractedMappings) -> Vec<Mapping> {
-    // Fully destructure the mappings struct to make sure we don't miss any kinds.
-    let ExtractedMappings {
-        code_mappings,
-        branch_pairs,
-        mcdc_bitmap_bits: _,
-        mcdc_degraded_branches,
-        mcdc_mappings,
-    } = extracted_mappings;
-    let mut mappings = Vec::new();
-
-    mappings.extend(code_mappings.iter().map(
-        // Ordinary code mappings are the simplest kind.
-        |&mappings::CodeMapping { span, bcb }| {
-            let kind = MappingKind::Code { bcb };
-            Mapping { kind, span }
-        },
-    ));
-
-    mappings.extend(branch_pairs.iter().map(
-        |&mappings::BranchPair { span, true_bcb, false_bcb }| {
-            let kind = MappingKind::Branch { true_bcb, false_bcb };
-            Mapping { kind, span }
-        },
-    ));
-
-    // MCDC branch mappings are appended with their decisions in case decisions were ignored.
-    mappings.extend(mcdc_degraded_branches.iter().map(
-        |&mappings::MCDCBranch {
-             span,
-             true_bcb,
-             false_bcb,
-             condition_info: _,
-             true_index: _,
-             false_index: _,
-         }| { Mapping { kind: MappingKind::Branch { true_bcb, false_bcb }, span } },
-    ));
-
-    for (decision, branches) in mcdc_mappings {
-        // FIXME(#134497): Previously it was possible for some of these branch
-        // conversions to fail, in which case the remaining branches in the
-        // decision would be degraded to plain `MappingKind::Branch`.
-        // The changes in #134497 made that failure impossible, because the
-        // fallible step was deferred to codegen. But the corresponding code
-        // in codegen wasn't updated to detect the need for a degrade step.
-        let conditions = branches
-            .into_iter()
-            .map(
-                |&mappings::MCDCBranch {
-                     span,
-                     true_bcb,
-                     false_bcb,
-                     condition_info,
-                     true_index: _,
-                     false_index: _,
-                 }| {
-                    Mapping {
-                        kind: MappingKind::MCDCBranch {
-                            true_bcb,
-                            false_bcb,
-                            mcdc_params: condition_info,
-                        },
-                        span,
-                    }
-                },
-            )
-            .collect::<Vec<_>>();
-
-        // LLVM requires end index for counter mapping regions.
-        let kind = MappingKind::MCDCDecision(DecisionInfo {
-            bitmap_idx: (decision.bitmap_idx + decision.num_test_vectors) as u32,
-            num_conditions: u16::try_from(conditions.len()).unwrap(),
-        });
-        let span = decision.span;
-        mappings.extend(std::iter::once(Mapping { kind, span }).chain(conditions.into_iter()));
-    }
-
-    mappings
-}
-
 /// Inject any necessary coverage statements into MIR, so that they influence codegen.
 fn inject_coverage_statements<'tcx>(mir_body: &mut mir::Body<'tcx>, graph: &CoverageGraph) {
     for (bcb, data) in graph.iter_enumerated() {
@@ -210,51 +107,6 @@ fn inject_coverage_statements<'tcx>(mir_body: &mut mir::Body<'tcx>, graph: &Cove
     }
 }
 
-/// For each conditions inject statements to update condition bitmap after it has been evaluated.
-/// For each decision inject statements to update test vector bitmap after it has been evaluated.
-fn inject_mcdc_statements<'tcx>(
-    mir_body: &mut mir::Body<'tcx>,
-    graph: &CoverageGraph,
-    extracted_mappings: &ExtractedMappings,
-) {
-    for (decision, conditions) in &extracted_mappings.mcdc_mappings {
-        // Inject test vector update first because `inject_statement` always insert new statement at head.
-        for &end in &decision.end_bcbs {
-            let end_bb = graph[end].leader_bb();
-            inject_statement(
-                mir_body,
-                CoverageKind::TestVectorBitmapUpdate {
-                    bitmap_idx: decision.bitmap_idx as u32,
-                    decision_depth: decision.decision_depth,
-                },
-                end_bb,
-            );
-        }
-
-        for &mappings::MCDCBranch {
-            span: _,
-            true_bcb,
-            false_bcb,
-            condition_info: _,
-            true_index,
-            false_index,
-        } in conditions
-        {
-            for (index, bcb) in [(false_index, false_bcb), (true_index, true_bcb)] {
-                let bb = graph[bcb].leader_bb();
-                inject_statement(
-                    mir_body,
-                    CoverageKind::CondBitmapUpdate {
-                        index: index as u32,
-                        decision_depth: decision.decision_depth,
-                    },
-                    bb,
-                );
-            }
-        }
-    }
-}
-
 fn inject_statement(mir_body: &mut mir::Body<'_>, counter_kind: CoverageKind, bb: BasicBlock) {
     debug!("  injecting statement {counter_kind:?} for {bb:?}");
     let data = &mut mir_body[bb];
@@ -262,122 +114,3 @@ fn inject_statement(mir_body: &mut mir::Body<'_>, counter_kind: CoverageKind, bb
     let statement = Statement::new(source_info, StatementKind::Coverage(counter_kind));
     data.statements.insert(0, statement);
 }
-
-/// Function information extracted from HIR by the coverage instrumentor.
-#[derive(Debug)]
-struct ExtractedHirInfo {
-    function_source_hash: u64,
-    is_async_fn: bool,
-    /// The span of the function's signature, if available.
-    /// Must have the same context and filename as the body span.
-    fn_sig_span: Option<Span>,
-    body_span: Span,
-    /// "Holes" are regions within the function body (or its expansions) that
-    /// should not be included in coverage spans for this function
-    /// (e.g. closures and nested items).
-    hole_spans: Vec<Span>,
-}
-
-fn extract_hir_info<'tcx>(tcx: TyCtxt<'tcx>, def_id: LocalDefId) -> ExtractedHirInfo {
-    // FIXME(#79625): Consider improving MIR to provide the information needed, to avoid going back
-    // to HIR for it.
-
-    // HACK: For synthetic MIR bodies (async closures), use the def id of the HIR body.
-    if tcx.is_synthetic_mir(def_id) {
-        return extract_hir_info(tcx, tcx.local_parent(def_id));
-    }
-
-    let hir_node = tcx.hir_node_by_def_id(def_id);
-    let fn_body_id = hir_node.body_id().expect("HIR node is a function with body");
-    let hir_body = tcx.hir_body(fn_body_id);
-
-    let maybe_fn_sig = hir_node.fn_sig();
-    let is_async_fn = maybe_fn_sig.is_some_and(|fn_sig| fn_sig.header.is_async());
-
-    let mut body_span = hir_body.value.span;
-
-    use hir::{Closure, Expr, ExprKind, Node};
-    // Unexpand a closure's body span back to the context of its declaration.
-    // This helps with closure bodies that consist of just a single bang-macro,
-    // and also with closure bodies produced by async desugaring.
-    if let Node::Expr(&Expr { kind: ExprKind::Closure(&Closure { fn_decl_span, .. }), .. }) =
-        hir_node
-    {
-        body_span = body_span.find_ancestor_in_same_ctxt(fn_decl_span).unwrap_or(body_span);
-    }
-
-    // The actual signature span is only used if it has the same context and
-    // filename as the body, and precedes the body.
-    let fn_sig_span = maybe_fn_sig.map(|fn_sig| fn_sig.span).filter(|&fn_sig_span| {
-        let source_map = tcx.sess.source_map();
-        let file_idx = |span: Span| source_map.lookup_source_file_idx(span.lo());
-
-        fn_sig_span.eq_ctxt(body_span)
-            && fn_sig_span.hi() <= body_span.lo()
-            && file_idx(fn_sig_span) == file_idx(body_span)
-    });
-
-    let function_source_hash = hash_mir_source(tcx, hir_body);
-
-    let hole_spans = extract_hole_spans_from_hir(tcx, hir_body);
-
-    ExtractedHirInfo { function_source_hash, is_async_fn, fn_sig_span, body_span, hole_spans }
-}
-
-fn hash_mir_source<'tcx>(tcx: TyCtxt<'tcx>, hir_body: &'tcx hir::Body<'tcx>) -> u64 {
-    // FIXME(cjgillot) Stop hashing HIR manually here.
-    let owner = hir_body.id().hir_id.owner;
-    tcx.hir_owner_nodes(owner).opt_hash_including_bodies.unwrap().to_smaller_hash().as_u64()
-}
-
-fn extract_hole_spans_from_hir<'tcx>(tcx: TyCtxt<'tcx>, hir_body: &hir::Body<'tcx>) -> Vec<Span> {
-    struct HolesVisitor<'tcx> {
-        tcx: TyCtxt<'tcx>,
-        hole_spans: Vec<Span>,
-    }
-
-    impl<'tcx> Visitor<'tcx> for HolesVisitor<'tcx> {
-        /// We have special handling for nested items, but we still want to
-        /// traverse into nested bodies of things that are not considered items,
-        /// such as "anon consts" (e.g. array lengths).
-        type NestedFilter = nested_filter::OnlyBodies;
-
-        fn maybe_tcx(&mut self) -> TyCtxt<'tcx> {
-            self.tcx
-        }
-
-        /// We override `visit_nested_item` instead of `visit_item` because we
-        /// only need the item's span, not the item itself.
-        fn visit_nested_item(&mut self, id: hir::ItemId) -> Self::Result {
-            let span = self.tcx.def_span(id.owner_id.def_id);
-            self.visit_hole_span(span);
-            // Having visited this item, we don't care about its children,
-            // so don't call `walk_item`.
-        }
-
-        // We override `visit_expr` instead of the more specific expression
-        // visitors, so that we have direct access to the expression span.
-        fn visit_expr(&mut self, expr: &'tcx hir::Expr<'tcx>) {
-            match expr.kind {
-                hir::ExprKind::Closure(_) | hir::ExprKind::ConstBlock(_) => {
-                    self.visit_hole_span(expr.span);
-                    // Having visited this expression, we don't care about its
-                    // children, so don't call `walk_expr`.
-                }
-
-                // For other expressions, recursively visit as normal.
-                _ => walk_expr(self, expr),
-            }
-        }
-    }
-    impl HolesVisitor<'_> {
-        fn visit_hole_span(&mut self, hole_span: Span) {
-            self.hole_spans.push(hole_span);
-        }
-    }
-
-    let mut visitor = HolesVisitor { tcx, hole_spans: vec![] };
-
-    visitor.visit_body(hir_body);
-    visitor.hole_spans
-}
diff --git a/compiler/rustc_mir_transform/src/coverage/query.rs b/compiler/rustc_mir_transform/src/coverage/query.rs
index ccf76dc7108..63c550c27fe 100644
--- a/compiler/rustc_mir_transform/src/coverage/query.rs
+++ b/compiler/rustc_mir_transform/src/coverage/query.rs
@@ -1,3 +1,5 @@
+use rustc_hir::attrs::{AttributeKind, CoverageAttrKind};
+use rustc_hir::find_attr;
 use rustc_index::bit_set::DenseBitSet;
 use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
 use rustc_middle::mir::coverage::{BasicCoverageBlock, CoverageIdsInfo, CoverageKind, MappingKind};
@@ -5,7 +7,6 @@ use rustc_middle::mir::{Body, Statement, StatementKind};
 use rustc_middle::ty::{self, TyCtxt};
 use rustc_middle::util::Providers;
 use rustc_span::def_id::LocalDefId;
-use rustc_span::sym;
 use tracing::trace;
 
 use crate::coverage::counters::node_flow::make_node_counters;
@@ -32,16 +33,6 @@ fn is_eligible_for_coverage(tcx: TyCtxt<'_>, def_id: LocalDefId) -> bool {
         return false;
     }
 
-    // Don't instrument functions with `#[automatically_derived]` on their
-    // enclosing impl block, on the assumption that most users won't care about
-    // coverage for derived impls.
-    if let Some(impl_of) = tcx.impl_of_method(def_id.to_def_id())
-        && tcx.is_automatically_derived(impl_of)
-    {
-        trace!("InstrumentCoverage skipped for {def_id:?} (automatically derived)");
-        return false;
-    }
-
     if tcx.codegen_fn_attrs(def_id).flags.contains(CodegenFnAttrFlags::NAKED) {
         trace!("InstrumentCoverage skipped for {def_id:?} (`#[naked]`)");
         return false;
@@ -57,22 +48,28 @@ fn is_eligible_for_coverage(tcx: TyCtxt<'_>, def_id: LocalDefId) -> bool {
 
 /// Query implementation for `coverage_attr_on`.
 fn coverage_attr_on(tcx: TyCtxt<'_>, def_id: LocalDefId) -> bool {
-    // Check for annotations directly on this def.
-    if let Some(attr) = tcx.get_attr(def_id, sym::coverage) {
-        match attr.meta_item_list().as_deref() {
-            Some([item]) if item.has_name(sym::off) => return false,
-            Some([item]) if item.has_name(sym::on) => return true,
-            Some(_) | None => {
-                // Other possibilities should have been rejected by `rustc_parse::validate_attr`.
-                // Use `span_delayed_bug` to avoid an ICE in failing builds (#127880).
-                tcx.dcx().span_delayed_bug(attr.span(), "unexpected value of coverage attribute");
-            }
+    // Check for a `#[coverage(..)]` attribute on this def.
+    if let Some(kind) =
+        find_attr!(tcx.get_all_attrs(def_id), AttributeKind::Coverage(_sp, kind) => kind)
+    {
+        match kind {
+            CoverageAttrKind::On => return true,
+            CoverageAttrKind::Off => return false,
         }
+    };
+
+    // Treat `#[automatically_derived]` as an implied `#[coverage(off)]`, on
+    // the assumption that most users won't want coverage for derived impls.
+    //
+    // This affects not just the associated items of an impl block, but also
+    // any closures and other nested functions within those associated items.
+    if tcx.is_automatically_derived(def_id.to_def_id()) {
+        return false;
     }
 
+    // Check the parent def (and so on recursively) until we find an
+    // enclosing attribute or reach the crate root.
     match tcx.opt_local_parent(def_id) {
-        // Check the parent def (and so on recursively) until we find an
-        // enclosing attribute or reach the crate root.
         Some(parent) => tcx.coverage_attr_on(parent),
         // We reached the crate root without seeing a coverage attribute, so
         // allow coverage instrumentation by default.
@@ -114,11 +111,6 @@ fn coverage_ids_info<'tcx>(
                 bcb_needs_counter.insert(true_bcb);
                 bcb_needs_counter.insert(false_bcb);
             }
-            MappingKind::MCDCBranch { true_bcb, false_bcb, mcdc_params: _ } => {
-                bcb_needs_counter.insert(true_bcb);
-                bcb_needs_counter.insert(false_bcb);
-            }
-            MappingKind::MCDCDecision(_) => {}
         }
     }
 
diff --git a/compiler/rustc_mir_transform/src/coverage/spans.rs b/compiler/rustc_mir_transform/src/coverage/spans.rs
index ec76076020e..325935ee846 100644
--- a/compiler/rustc_mir_transform/src/coverage/spans.rs
+++ b/compiler/rustc_mir_transform/src/coverage/spans.rs
@@ -1,12 +1,14 @@
-use rustc_data_structures::fx::FxHashSet;
 use rustc_middle::mir;
+use rustc_middle::mir::coverage::{Mapping, MappingKind, START_BCB};
 use rustc_middle::ty::TyCtxt;
-use rustc_span::{DesugaringKind, ExpnKind, MacroKind, Span};
+use rustc_span::source_map::SourceMap;
+use rustc_span::{BytePos, DesugaringKind, ExpnId, ExpnKind, MacroKind, Span};
 use tracing::instrument;
 
+use crate::coverage::expansion::{self, ExpnTree, SpanWithBcb};
 use crate::coverage::graph::{BasicCoverageBlock, CoverageGraph};
-use crate::coverage::spans::from_mir::{Hole, RawSpanFromMir, SpanFromMir};
-use crate::coverage::{ExtractedHirInfo, mappings, unexpand};
+use crate::coverage::hir_info::ExtractedHirInfo;
+use crate::coverage::spans::from_mir::{Hole, RawSpanFromMir};
 
 mod from_mir;
 
@@ -15,24 +17,67 @@ pub(super) fn extract_refined_covspans<'tcx>(
     mir_body: &mir::Body<'tcx>,
     hir_info: &ExtractedHirInfo,
     graph: &CoverageGraph,
-    code_mappings: &mut impl Extend<mappings::CodeMapping>,
+    mappings: &mut Vec<Mapping>,
 ) {
+    if hir_info.is_async_fn {
+        // An async function desugars into a function that returns a future,
+        // with the user code wrapped in a closure. Any spans in the desugared
+        // outer function will be unhelpful, so just keep the signature span
+        // and ignore all of the spans in the MIR body.
+        if let Some(span) = hir_info.fn_sig_span {
+            mappings.push(Mapping { span, kind: MappingKind::Code { bcb: START_BCB } })
+        }
+        return;
+    }
+
     let &ExtractedHirInfo { body_span, .. } = hir_info;
 
     let raw_spans = from_mir::extract_raw_spans_from_mir(mir_body, graph);
-    let mut covspans = raw_spans
-        .into_iter()
-        .filter_map(|RawSpanFromMir { raw_span, bcb }| try {
-            let (span, expn_kind) =
-                unexpand::unexpand_into_body_span_with_expn_kind(raw_span, body_span)?;
-            // Discard any spans that fill the entire body, because they tend
-            // to represent compiler-inserted code, e.g. implicitly returning `()`.
-            if span.source_equal(body_span) {
-                return None;
-            };
-            SpanFromMir { span, expn_kind, bcb }
-        })
-        .collect::<Vec<_>>();
+    // Use the raw spans to build a tree of expansions for this function.
+    let expn_tree = expansion::build_expn_tree(
+        raw_spans
+            .into_iter()
+            .map(|RawSpanFromMir { raw_span, bcb }| SpanWithBcb { span: raw_span, bcb }),
+    );
+
+    let mut covspans = vec![];
+    let mut push_covspan = |covspan: Covspan| {
+        let covspan_span = covspan.span;
+        // Discard any spans not contained within the function body span.
+        // Also discard any spans that fill the entire body, because they tend
+        // to represent compiler-inserted code, e.g. implicitly returning `()`.
+        if !body_span.contains(covspan_span) || body_span.source_equal(covspan_span) {
+            return;
+        }
+
+        // Each pushed covspan should have the same context as the body span.
+        // If it somehow doesn't, discard the covspan, or panic in debug builds.
+        if !body_span.eq_ctxt(covspan_span) {
+            debug_assert!(
+                false,
+                "span context mismatch: body_span={body_span:?}, covspan.span={covspan_span:?}"
+            );
+            return;
+        }
+
+        covspans.push(covspan);
+    };
+
+    if let Some(node) = expn_tree.get(body_span.ctxt().outer_expn()) {
+        for &SpanWithBcb { span, bcb } in &node.spans {
+            push_covspan(Covspan { span, bcb });
+        }
+
+        // For each expansion with its call-site in the body span, try to
+        // distill a corresponding covspan.
+        for &child_expn_id in &node.child_expn_ids {
+            if let Some(covspan) =
+                single_covspan_for_child_expn(tcx, graph, &expn_tree, child_expn_id)
+            {
+                push_covspan(covspan);
+            }
+        }
+    }
 
     // Only proceed if we found at least one usable span.
     if covspans.is_empty() {
@@ -43,17 +88,10 @@ pub(super) fn extract_refined_covspans<'tcx>(
     // Otherwise, add a fake span at the start of the body, to avoid an ugly
     // gap between the start of the body and the first real span.
     // FIXME: Find a more principled way to solve this problem.
-    covspans.push(SpanFromMir::for_fn_sig(
-        hir_info.fn_sig_span.unwrap_or_else(|| body_span.shrink_to_lo()),
-    ));
-
-    // First, perform the passes that need macro information.
-    covspans.sort_by(|a, b| graph.cmp_in_dominator_order(a.bcb, b.bcb));
-    remove_unwanted_expansion_spans(&mut covspans);
-    shrink_visible_macro_spans(tcx, &mut covspans);
-
-    // We no longer need the extra information in `SpanFromMir`, so convert to `Covspan`.
-    let mut covspans = covspans.into_iter().map(SpanFromMir::into_covspan).collect::<Vec<_>>();
+    covspans.push(Covspan {
+        span: hir_info.fn_sig_span.unwrap_or_else(|| body_span.shrink_to_lo()),
+        bcb: START_BCB,
+    });
 
     let compare_covspans = |a: &Covspan, b: &Covspan| {
         compare_spans(a.span, b.span)
@@ -83,53 +121,57 @@ pub(super) fn extract_refined_covspans<'tcx>(
     // Discard any span that overlaps with a hole.
     discard_spans_overlapping_holes(&mut covspans, &holes);
 
-    // Perform more refinement steps after holes have been dealt with.
+    // Discard spans that overlap in unwanted ways.
     let mut covspans = remove_unwanted_overlapping_spans(covspans);
+
+    // For all empty spans, either enlarge them to be non-empty, or discard them.
+    let source_map = tcx.sess.source_map();
+    covspans.retain_mut(|covspan| {
+        let Some(span) = ensure_non_empty_span(source_map, covspan.span) else { return false };
+        covspan.span = span;
+        true
+    });
+
+    // Merge covspans that can be merged.
     covspans.dedup_by(|b, a| a.merge_if_eligible(b));
 
-    code_mappings.extend(covspans.into_iter().map(|Covspan { span, bcb }| {
+    mappings.extend(covspans.into_iter().map(|Covspan { span, bcb }| {
         // Each span produced by the refiner represents an ordinary code region.
-        mappings::CodeMapping { span, bcb }
+        Mapping { span, kind: MappingKind::Code { bcb } }
     }));
 }
 
-/// Macros that expand into branches (e.g. `assert!`, `trace!`) tend to generate
-/// multiple condition/consequent blocks that have the span of the whole macro
-/// invocation, which is unhelpful. Keeping only the first such span seems to
-/// give better mappings, so remove the others.
-///
-/// Similarly, `await` expands to a branch on the discriminant of `Poll`, which
-/// leads to incorrect coverage if the `Future` is immediately ready (#98712).
-///
-/// (The input spans should be sorted in BCB dominator order, so that the
-/// retained "first" span is likely to dominate the others.)
-fn remove_unwanted_expansion_spans(covspans: &mut Vec<SpanFromMir>) {
-    let mut deduplicated_spans = FxHashSet::default();
-
-    covspans.retain(|covspan| {
-        match covspan.expn_kind {
-            // Retain only the first await-related or macro-expanded covspan with this span.
-            Some(ExpnKind::Desugaring(DesugaringKind::Await)) => {
-                deduplicated_spans.insert(covspan.span)
-            }
-            Some(ExpnKind::Macro(MacroKind::Bang, _)) => deduplicated_spans.insert(covspan.span),
-            // Ignore (retain) other spans.
-            _ => true,
-        }
-    });
-}
+/// For a single child expansion, try to distill it into a single span+BCB mapping.
+fn single_covspan_for_child_expn(
+    tcx: TyCtxt<'_>,
+    graph: &CoverageGraph,
+    expn_tree: &ExpnTree,
+    expn_id: ExpnId,
+) -> Option<Covspan> {
+    let node = expn_tree.get(expn_id)?;
 
-/// When a span corresponds to a macro invocation that is visible from the
-/// function body, truncate it to just the macro name plus `!`.
-/// This seems to give better results for code that uses macros.
-fn shrink_visible_macro_spans(tcx: TyCtxt<'_>, covspans: &mut Vec<SpanFromMir>) {
-    let source_map = tcx.sess.source_map();
+    let bcbs =
+        expn_tree.iter_node_and_descendants(expn_id).flat_map(|n| n.spans.iter().map(|s| s.bcb));
 
-    for covspan in covspans {
-        if matches!(covspan.expn_kind, Some(ExpnKind::Macro(MacroKind::Bang, _))) {
-            covspan.span = source_map.span_through_char(covspan.span, '!');
+    let bcb = match node.expn_kind {
+        // For bang-macros (e.g. `assert!`, `trace!`) and for `await`, taking
+        // the "first" BCB in dominator order seems to give good results.
+        ExpnKind::Macro(MacroKind::Bang, _) | ExpnKind::Desugaring(DesugaringKind::Await) => {
+            bcbs.min_by(|&a, &b| graph.cmp_in_dominator_order(a, b))?
         }
+        // For other kinds of expansion, taking the "last" (most-dominated) BCB
+        // seems to give good results.
+        _ => bcbs.max_by(|&a, &b| graph.cmp_in_dominator_order(a, b))?,
+    };
+
+    // For bang-macro expansions, limit the call-site span to just the macro
+    // name plus `!`, excluding the macro arguments.
+    let mut span = node.call_site?;
+    if matches!(node.expn_kind, ExpnKind::Macro(MacroKind::Bang, _)) {
+        span = tcx.sess.source_map().span_through_char(span, '!');
     }
+
+    Some(Covspan { span, bcb })
 }
 
 /// Discard all covspans that overlap a hole.
@@ -230,3 +272,26 @@ fn compare_spans(a: Span, b: Span) -> std::cmp::Ordering {
         // - Both have the same start and span A extends further right
         .then_with(|| Ord::cmp(&a.hi(), &b.hi()).reverse())
 }
+
+fn ensure_non_empty_span(source_map: &SourceMap, span: Span) -> Option<Span> {
+    if !span.is_empty() {
+        return Some(span);
+    }
+
+    // The span is empty, so try to enlarge it to cover an adjacent '{' or '}'.
+    source_map
+        .span_to_source(span, |src, start, end| try {
+            // Adjusting span endpoints by `BytePos(1)` is normally a bug,
+            // but in this case we have specifically checked that the character
+            // we're skipping over is one of two specific ASCII characters, so
+            // adjusting by exactly 1 byte is correct.
+            if src.as_bytes().get(end).copied() == Some(b'{') {
+                Some(span.with_hi(span.hi() + BytePos(1)))
+            } else if start > 0 && src.as_bytes()[start - 1] == b'}' {
+                Some(span.with_lo(span.lo() - BytePos(1)))
+            } else {
+                None
+            }
+        })
+        .ok()?
+}
diff --git a/compiler/rustc_mir_transform/src/coverage/spans/from_mir.rs b/compiler/rustc_mir_transform/src/coverage/spans/from_mir.rs
index 804cd8ab3f7..dfeaa90dc2e 100644
--- a/compiler/rustc_mir_transform/src/coverage/spans/from_mir.rs
+++ b/compiler/rustc_mir_transform/src/coverage/spans/from_mir.rs
@@ -5,10 +5,9 @@ use rustc_middle::mir::coverage::CoverageKind;
 use rustc_middle::mir::{
     self, FakeReadCause, Statement, StatementKind, Terminator, TerminatorKind,
 };
-use rustc_span::{ExpnKind, Span};
+use rustc_span::Span;
 
-use crate::coverage::graph::{BasicCoverageBlock, CoverageGraph, START_BCB};
-use crate::coverage::spans::Covspan;
+use crate::coverage::graph::{BasicCoverageBlock, CoverageGraph};
 
 #[derive(Debug)]
 pub(crate) struct RawSpanFromMir {
@@ -101,11 +100,7 @@ fn filtered_statement_span(statement: &Statement<'_>) -> Option<Span> {
         StatementKind::Coverage(CoverageKind::BlockMarker { .. }) => None,
 
         // These coverage statements should not exist prior to coverage instrumentation.
-        StatementKind::Coverage(
-            CoverageKind::VirtualCounter { .. }
-            | CoverageKind::CondBitmapUpdate { .. }
-            | CoverageKind::TestVectorBitmapUpdate { .. },
-        ) => bug!(
+        StatementKind::Coverage(CoverageKind::VirtualCounter { .. }) => bug!(
             "Unexpected coverage statement found during coverage instrumentation: {statement:?}"
         ),
     }
@@ -164,32 +159,3 @@ impl Hole {
         true
     }
 }
-
-#[derive(Debug)]
-pub(crate) struct SpanFromMir {
-    /// A span that has been extracted from MIR and then "un-expanded" back to
-    /// within the current function's `body_span`. After various intermediate
-    /// processing steps, this span is emitted as part of the final coverage
-    /// mappings.
-    ///
-    /// With the exception of `fn_sig_span`, this should always be contained
-    /// within `body_span`.
-    pub(crate) span: Span,
-    pub(crate) expn_kind: Option<ExpnKind>,
-    pub(crate) bcb: BasicCoverageBlock,
-}
-
-impl SpanFromMir {
-    pub(crate) fn for_fn_sig(fn_sig_span: Span) -> Self {
-        Self::new(fn_sig_span, None, START_BCB)
-    }
-
-    pub(crate) fn new(span: Span, expn_kind: Option<ExpnKind>, bcb: BasicCoverageBlock) -> Self {
-        Self { span, expn_kind, bcb }
-    }
-
-    pub(crate) fn into_covspan(self) -> Covspan {
-        let Self { span, expn_kind: _, bcb } = self;
-        Covspan { span, bcb }
-    }
-}
diff --git a/compiler/rustc_mir_transform/src/coverage/unexpand.rs b/compiler/rustc_mir_transform/src/coverage/unexpand.rs
index cb861544736..922edd3cc4f 100644
--- a/compiler/rustc_mir_transform/src/coverage/unexpand.rs
+++ b/compiler/rustc_mir_transform/src/coverage/unexpand.rs
@@ -1,4 +1,4 @@
-use rustc_span::{ExpnKind, Span};
+use rustc_span::Span;
 
 /// Walks through the expansion ancestors of `original_span` to find a span that
 /// is contained in `body_span` and has the same [syntax context] as `body_span`.
@@ -7,49 +7,3 @@ pub(crate) fn unexpand_into_body_span(original_span: Span, body_span: Span) -> O
     // we can just delegate directly to `find_ancestor_inside_same_ctxt`.
     original_span.find_ancestor_inside_same_ctxt(body_span)
 }
-
-/// Walks through the expansion ancestors of `original_span` to find a span that
-/// is contained in `body_span` and has the same [syntax context] as `body_span`.
-///
-/// If the returned span represents a bang-macro invocation (e.g. `foo!(..)`),
-/// the returned symbol will be the name of that macro (e.g. `foo`).
-pub(crate) fn unexpand_into_body_span_with_expn_kind(
-    original_span: Span,
-    body_span: Span,
-) -> Option<(Span, Option<ExpnKind>)> {
-    let (span, prev) = unexpand_into_body_span_with_prev(original_span, body_span)?;
-
-    let expn_kind = prev.map(|prev| prev.ctxt().outer_expn_data().kind);
-
-    Some((span, expn_kind))
-}
-
-/// Walks through the expansion ancestors of `original_span` to find a span that
-/// is contained in `body_span` and has the same [syntax context] as `body_span`.
-/// The ancestor that was traversed just before the matching span (if any) is
-/// also returned.
-///
-/// For example, a return value of `Some((ancestor, Some(prev)))` means that:
-/// - `ancestor == original_span.find_ancestor_inside_same_ctxt(body_span)`
-/// - `prev.parent_callsite() == ancestor`
-///
-/// [syntax context]: rustc_span::SyntaxContext
-fn unexpand_into_body_span_with_prev(
-    original_span: Span,
-    body_span: Span,
-) -> Option<(Span, Option<Span>)> {
-    let mut prev = None;
-    let mut curr = original_span;
-
-    while !body_span.contains(curr) || !curr.eq_ctxt(body_span) {
-        prev = Some(curr);
-        curr = curr.parent_callsite()?;
-    }
-
-    debug_assert_eq!(Some(curr), original_span.find_ancestor_inside_same_ctxt(body_span));
-    if let Some(prev) = prev {
-        debug_assert_eq!(Some(curr), prev.parent_callsite());
-    }
-
-    Some((curr, prev))
-}
diff --git a/compiler/rustc_mir_transform/src/cross_crate_inline.rs b/compiler/rustc_mir_transform/src/cross_crate_inline.rs
index 6d7b7e10ef6..df98c07f549 100644
--- a/compiler/rustc_mir_transform/src/cross_crate_inline.rs
+++ b/compiler/rustc_mir_transform/src/cross_crate_inline.rs
@@ -1,4 +1,4 @@
-use rustc_attr_data_structures::InlineAttr;
+use rustc_hir::attrs::InlineAttr;
 use rustc_hir::def::DefKind;
 use rustc_hir::def_id::LocalDefId;
 use rustc_middle::mir::visit::Visitor;
@@ -135,7 +135,16 @@ impl<'tcx> Visitor<'tcx> for CostChecker<'_, 'tcx> {
                     }
                 }
             }
-            TerminatorKind::Call { unwind, .. } => {
+            TerminatorKind::Call { ref func, unwind, .. } => {
+                // We track calls because they make our function not a leaf (and in theory, the
+                // number of calls indicates how likely this function is to perturb other CGUs).
+                // But intrinsics don't have a body that gets assigned to a CGU, so they are
+                // ignored.
+                if let Some((fn_def_id, _)) = func.const_fn_def()
+                    && self.tcx.has_attr(fn_def_id, sym::rustc_intrinsic)
+                {
+                    return;
+                }
                 self.calls += 1;
                 if let UnwindAction::Cleanup(_) = unwind {
                     self.landing_pads += 1;
diff --git a/compiler/rustc_mir_transform/src/ctfe_limit.rs b/compiler/rustc_mir_transform/src/ctfe_limit.rs
index fb17cca30f4..ac46336b834 100644
--- a/compiler/rustc_mir_transform/src/ctfe_limit.rs
+++ b/compiler/rustc_mir_transform/src/ctfe_limit.rs
@@ -18,7 +18,7 @@ impl<'tcx> crate::MirPass<'tcx> for CtfeLimit {
             .basic_blocks
             .iter_enumerated()
             .filter_map(|(node, node_data)| {
-                if matches!(node_data.terminator().kind, TerminatorKind::Call { .. })
+                if matches!(node_data.terminator().kind, TerminatorKind::Call { .. } | TerminatorKind::TailCall { .. })
                     // Back edges in a CFG indicate loops
                     || has_back_edge(doms, node, node_data)
                 {
diff --git a/compiler/rustc_mir_transform/src/dataflow_const_prop.rs b/compiler/rustc_mir_transform/src/dataflow_const_prop.rs
index fe53de31f75..5c984984d3c 100644
--- a/compiler/rustc_mir_transform/src/dataflow_const_prop.rs
+++ b/compiler/rustc_mir_transform/src/dataflow_const_prop.rs
@@ -412,18 +412,6 @@ impl<'a, 'tcx> ConstAnalysis<'a, 'tcx> {
         state: &mut State<FlatSet<Scalar>>,
     ) -> ValueOrPlace<FlatSet<Scalar>> {
         let val = match rvalue {
-            Rvalue::Len(place) => {
-                let place_ty = place.ty(self.local_decls, self.tcx);
-                if let ty::Array(_, len) = place_ty.ty.kind() {
-                    Const::Ty(self.tcx.types.usize, *len)
-                        .try_eval_scalar(self.tcx, self.typing_env)
-                        .map_or(FlatSet::Top, FlatSet::Elem)
-                } else if let [ProjectionElem::Deref] = place.projection[..] {
-                    state.get_len(place.local.into(), &self.map)
-                } else {
-                    FlatSet::Top
-                }
-            }
             Rvalue::Cast(CastKind::IntToInt | CastKind::IntToFloat, operand, ty) => {
                 let Ok(layout) = self.tcx.layout_of(self.typing_env.as_query_input(*ty)) else {
                     return ValueOrPlace::Value(FlatSet::Top);
@@ -465,15 +453,23 @@ impl<'a, 'tcx> ConstAnalysis<'a, 'tcx> {
                 let (val, _overflow) = self.binary_op(state, *op, left, right);
                 val
             }
-            Rvalue::UnaryOp(op, operand) => match self.eval_operand(operand, state) {
-                FlatSet::Elem(value) => self
-                    .ecx
-                    .unary_op(*op, &value)
-                    .discard_err()
-                    .map_or(FlatSet::Top, |val| self.wrap_immediate(*val)),
-                FlatSet::Bottom => FlatSet::Bottom,
-                FlatSet::Top => FlatSet::Top,
-            },
+            Rvalue::UnaryOp(op, operand) => {
+                if let UnOp::PtrMetadata = op
+                    && let Some(place) = operand.place()
+                    && let Some(len) = self.map.find_len(place.as_ref())
+                {
+                    return ValueOrPlace::Place(len);
+                }
+                match self.eval_operand(operand, state) {
+                    FlatSet::Elem(value) => self
+                        .ecx
+                        .unary_op(*op, &value)
+                        .discard_err()
+                        .map_or(FlatSet::Top, |val| self.wrap_immediate(*val)),
+                    FlatSet::Bottom => FlatSet::Bottom,
+                    FlatSet::Top => FlatSet::Top,
+                }
+            }
             Rvalue::NullaryOp(null_op, ty) => {
                 let Ok(layout) = self.tcx.layout_of(self.typing_env.as_query_input(*ty)) else {
                     return ValueOrPlace::Value(FlatSet::Top);
diff --git a/compiler/rustc_mir_transform/src/dest_prop.rs b/compiler/rustc_mir_transform/src/dest_prop.rs
index 4c94a6c524e..74c22ff10c1 100644
--- a/compiler/rustc_mir_transform/src/dest_prop.rs
+++ b/compiler/rustc_mir_transform/src/dest_prop.rs
@@ -59,6 +59,12 @@
 //! The first two conditions are simple structural requirements on the `Assign` statements that can
 //! be trivially checked. The third requirement however is more difficult and costly to check.
 //!
+//! ## Current implementation
+//!
+//! The current implementation relies on live range computation to check for conflicts. We only
+//! allow to merge locals that have disjoint live ranges. The live range are defined with
+//! half-statement granularity, so as to make all writes be live for at least a half statement.
+//!
 //! ## Future Improvements
 //!
 //! There are a number of ways in which this pass could be improved in the future:
@@ -117,9 +123,8 @@
 //! - Layout optimizations for coroutines have been added to improve code generation for
 //!   async/await, which are very similar in spirit to what this optimization does.
 //!
-//! Also, rustc now has a simple NRVO pass (see `nrvo.rs`), which handles a subset of the cases that
-//! this destination propagation pass handles, proving that similar optimizations can be performed
-//! on MIR.
+//! [The next approach][attempt 4] computes a conflict matrix between locals by forbidding merging
+//! locals with competing writes or with one write while the other is live.
 //!
 //! ## Pre/Post Optimization
 //!
@@ -130,115 +135,102 @@
 //! [attempt 1]: https://github.com/rust-lang/rust/pull/47954
 //! [attempt 2]: https://github.com/rust-lang/rust/pull/71003
 //! [attempt 3]: https://github.com/rust-lang/rust/pull/72632
+//! [attempt 4]: https://github.com/rust-lang/rust/pull/96451
 
-use rustc_data_structures::fx::{FxIndexMap, IndexEntry, IndexOccupiedEntry};
+use rustc_data_structures::union_find::UnionFind;
 use rustc_index::bit_set::DenseBitSet;
 use rustc_index::interval::SparseIntervalMatrix;
-use rustc_middle::bug;
-use rustc_middle::mir::visit::{MutVisitor, PlaceContext, Visitor};
-use rustc_middle::mir::{
-    Body, HasLocalDecls, InlineAsmOperand, Local, LocalKind, Location, Operand, PassWhere, Place,
-    Rvalue, Statement, StatementKind, TerminatorKind, dump_mir, traversal,
-};
+use rustc_index::{IndexVec, newtype_index};
+use rustc_middle::mir::visit::{MutVisitor, PlaceContext, VisitPlacesWith, Visitor};
+use rustc_middle::mir::*;
 use rustc_middle::ty::TyCtxt;
-use rustc_mir_dataflow::Analysis;
-use rustc_mir_dataflow::impls::MaybeLiveLocals;
-use rustc_mir_dataflow::points::{DenseLocationMap, PointIndex, save_as_intervals};
+use rustc_mir_dataflow::impls::{DefUse, MaybeLiveLocals};
+use rustc_mir_dataflow::points::DenseLocationMap;
+use rustc_mir_dataflow::{Analysis, Results};
 use tracing::{debug, trace};
 
 pub(super) struct DestinationPropagation;
 
 impl<'tcx> crate::MirPass<'tcx> for DestinationPropagation {
     fn is_enabled(&self, sess: &rustc_session::Session) -> bool {
-        // For now, only run at MIR opt level 3. Two things need to be changed before this can be
-        // turned on by default:
-        //  1. Because of the overeager removal of storage statements, this can cause stack space
-        //     regressions. This opt is not the place to fix this though, it's a more general
-        //     problem in MIR.
-        //  2. Despite being an overall perf improvement, this still causes a 30% regression in
-        //     keccak. We can temporarily fix this by bounding function size, but in the long term
-        //     we should fix this by being smarter about invalidating analysis results.
-        sess.mir_opt_level() >= 3
+        sess.mir_opt_level() >= 2
     }
 
+    #[tracing::instrument(level = "trace", skip(self, tcx, body))]
     fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
         let def_id = body.source.def_id();
-        let mut candidates = Candidates::default();
-        let mut write_info = WriteInfo::default();
-        trace!(func = ?tcx.def_path_str(def_id));
+        trace!(?def_id);
 
         let borrowed = rustc_mir_dataflow::impls::borrowed_locals(body);
 
+        let candidates = Candidates::find(body, &borrowed);
+        trace!(?candidates);
+        if candidates.c.is_empty() {
+            return;
+        }
+
         let live = MaybeLiveLocals.iterate_to_fixpoint(tcx, body, Some("MaybeLiveLocals-DestProp"));
+
         let points = DenseLocationMap::new(body);
-        let mut live = save_as_intervals(&points, body, live.analysis, live.results);
-
-        // In order to avoid having to collect data for every single pair of locals in the body, we
-        // do not allow doing more than one merge for places that are derived from the same local at
-        // once. To avoid missed opportunities, we instead iterate to a fixed point - we'll refer to
-        // each of these iterations as a "round."
-        //
-        // Reaching a fixed point could in theory take up to `min(l, s)` rounds - however, we do not
-        // expect to see MIR like that. To verify this, a test was run against `[rust-lang/regex]` -
-        // the average MIR body saw 1.32 full iterations of this loop. The most that was hit were 30
-        // for a single function. Only 80/2801 (2.9%) of functions saw at least 5.
-        //
-        // [rust-lang/regex]:
-        //     https://github.com/rust-lang/regex/tree/b5372864e2df6a2f5e543a556a62197f50ca3650
-        let mut round_count = 0;
-        loop {
-            // PERF: Can we do something smarter than recalculating the candidates and liveness
-            // results?
-            candidates.reset_and_find(body, &borrowed);
-            trace!(?candidates);
-            dest_prop_mir_dump(tcx, body, &points, &live, round_count);
-
-            FilterInformation::filter_liveness(
-                &mut candidates,
-                &points,
-                &live,
-                &mut write_info,
-                body,
-            );
-
-            // Because we only filter once per round, it is unsound to use a local for more than
-            // one merge operation within a single round of optimizations. We store here which ones
-            // we have already used.
-            let mut merged_locals: DenseBitSet<Local> =
-                DenseBitSet::new_empty(body.local_decls.len());
-
-            // This is the set of merges we will apply this round. It is a subset of the candidates.
-            let mut merges = FxIndexMap::default();
-
-            for (src, candidates) in candidates.c.iter() {
-                if merged_locals.contains(*src) {
-                    continue;
-                }
-                let Some(dest) = candidates.iter().find(|dest| !merged_locals.contains(**dest))
-                else {
-                    continue;
-                };
-
-                // Replace `src` by `dest` everywhere.
-                merges.insert(*src, *dest);
-                merged_locals.insert(*src);
-                merged_locals.insert(*dest);
-
-                // Update liveness information based on the merge we just performed.
-                // Every location where `src` was live, `dest` will be live.
-                live.union_rows(*src, *dest);
+        let mut relevant = RelevantLocals::compute(&candidates, body.local_decls.len());
+        let mut live = save_as_intervals(&points, body, &relevant, live.results);
+
+        dest_prop_mir_dump(tcx, body, &points, &live, &relevant);
+
+        let mut merged_locals = DenseBitSet::new_empty(body.local_decls.len());
+
+        for (src, dst) in candidates.c.into_iter() {
+            trace!(?src, ?dst);
+
+            let Some(mut src) = relevant.find(src) else { continue };
+            let Some(mut dst) = relevant.find(dst) else { continue };
+            if src == dst {
+                continue;
             }
-            trace!(merging = ?merges);
 
-            if merges.is_empty() {
-                break;
+            let Some(src_live_ranges) = live.row(src) else { continue };
+            let Some(dst_live_ranges) = live.row(dst) else { continue };
+            trace!(?src, ?src_live_ranges);
+            trace!(?dst, ?dst_live_ranges);
+
+            if src_live_ranges.disjoint(dst_live_ranges) {
+                // We want to replace `src` by `dst`.
+                let mut orig_src = relevant.original[src];
+                let mut orig_dst = relevant.original[dst];
+
+                // The return place and function arguments are required and cannot be renamed.
+                // This check cannot be made during candidate collection, as we may want to
+                // unify the same non-required local with several required locals.
+                match (is_local_required(orig_src, body), is_local_required(orig_dst, body)) {
+                    // Renaming `src` is ok.
+                    (false, _) => {}
+                    // Renaming `src` is wrong, but renaming `dst` is ok.
+                    (true, false) => {
+                        std::mem::swap(&mut src, &mut dst);
+                        std::mem::swap(&mut orig_src, &mut orig_dst);
+                    }
+                    // Neither local can be renamed, so skip this case.
+                    (true, true) => continue,
+                }
+
+                trace!(?src, ?dst, "merge");
+                merged_locals.insert(orig_src);
+                merged_locals.insert(orig_dst);
+
+                // Replace `src` by `dst`.
+                let head = relevant.union(src, dst);
+                live.union_rows(/* read */ src, /* write */ head);
+                live.union_rows(/* read */ dst, /* write */ head);
             }
-            round_count += 1;
+        }
+        trace!(?merged_locals);
+        trace!(?relevant.renames);
 
-            apply_merges(body, tcx, merges, merged_locals);
+        if merged_locals.is_empty() {
+            return;
         }
 
-        trace!(round_count);
+        apply_merges(body, tcx, relevant, merged_locals);
     }
 
     fn is_required(&self) -> bool {
@@ -246,30 +238,6 @@ impl<'tcx> crate::MirPass<'tcx> for DestinationPropagation {
     }
 }
 
-#[derive(Debug, Default)]
-struct Candidates {
-    /// The set of candidates we are considering in this optimization.
-    ///
-    /// We will always merge the key into at most one of its values.
-    ///
-    /// Whether a place ends up in the key or the value does not correspond to whether it appears as
-    /// the lhs or rhs of any assignment. As a matter of fact, the places in here might never appear
-    /// in an assignment at all. This happens because if we see an assignment like this:
-    ///
-    /// ```ignore (syntax-highlighting-only)
-    /// _1.0 = _2.0
-    /// ```
-    ///
-    /// We will still report that we would like to merge `_1` and `_2` in an attempt to allow us to
-    /// remove that assignment.
-    c: FxIndexMap<Local, Vec<Local>>,
-
-    /// A reverse index of the `c` set; if the `c` set contains `a => Place { local: b, proj }`,
-    /// then this contains `b => a`.
-    // PERF: Possibly these should be `SmallVec`s?
-    reverse: FxIndexMap<Local, Vec<Local>>,
-}
-
 //////////////////////////////////////////////////////////
 // Merging
 //
@@ -278,16 +246,16 @@ struct Candidates {
 fn apply_merges<'tcx>(
     body: &mut Body<'tcx>,
     tcx: TyCtxt<'tcx>,
-    merges: FxIndexMap<Local, Local>,
+    relevant: RelevantLocals,
     merged_locals: DenseBitSet<Local>,
 ) {
-    let mut merger = Merger { tcx, merges, merged_locals };
+    let mut merger = Merger { tcx, relevant, merged_locals };
     merger.visit_body_preserves_cfg(body);
 }
 
 struct Merger<'tcx> {
     tcx: TyCtxt<'tcx>,
-    merges: FxIndexMap<Local, Local>,
+    relevant: RelevantLocals,
     merged_locals: DenseBitSet<Local>,
 }
 
@@ -297,8 +265,8 @@ impl<'tcx> MutVisitor<'tcx> for Merger<'tcx> {
     }
 
     fn visit_local(&mut self, local: &mut Local, _: PlaceContext, _location: Location) {
-        if let Some(dest) = self.merges.get(local) {
-            *local = *dest;
+        if let Some(relevant) = self.relevant.find(*local) {
+            *local = self.relevant.original[relevant];
         }
     }
 
@@ -336,414 +304,95 @@ impl<'tcx> MutVisitor<'tcx> for Merger<'tcx> {
 }
 
 //////////////////////////////////////////////////////////
-// Liveness filtering
+// Relevant locals
 //
-// This section enforces bullet point 2
+// Small utility to reduce size of the conflict matrix by only considering locals that appear in
+// the candidates
 
-struct FilterInformation<'a, 'tcx> {
-    body: &'a Body<'tcx>,
-    points: &'a DenseLocationMap,
-    live: &'a SparseIntervalMatrix<Local, PointIndex>,
-    candidates: &'a mut Candidates,
-    write_info: &'a mut WriteInfo,
-    at: Location,
+newtype_index! {
+    /// Represent a subset of locals which appear in candidates.
+    struct RelevantLocal {}
 }
 
-// We first implement some utility functions which we will expose removing candidates according to
-// different needs. Throughout the liveness filtering, the `candidates` are only ever accessed
-// through these methods, and not directly.
-impl Candidates {
-    /// Collects the candidates for merging.
-    ///
-    /// This is responsible for enforcing the first and third bullet point.
-    fn reset_and_find<'tcx>(&mut self, body: &Body<'tcx>, borrowed: &DenseBitSet<Local>) {
-        self.c.clear();
-        self.reverse.clear();
-        let mut visitor = FindAssignments { body, candidates: &mut self.c, borrowed };
-        visitor.visit_body(body);
-        // Deduplicate candidates.
-        for (_, cands) in self.c.iter_mut() {
-            cands.sort();
-            cands.dedup();
-        }
-        // Generate the reverse map.
-        for (src, cands) in self.c.iter() {
-            for dest in cands.iter().copied() {
-                self.reverse.entry(dest).or_default().push(*src);
-            }
-        }
-    }
-
-    /// Just `Vec::retain`, but the condition is inverted and we add debugging output
-    fn vec_filter_candidates(
-        src: Local,
-        v: &mut Vec<Local>,
-        mut f: impl FnMut(Local) -> CandidateFilter,
-        at: Location,
-    ) {
-        v.retain(|dest| {
-            let remove = f(*dest);
-            if remove == CandidateFilter::Remove {
-                trace!("eliminating {:?} => {:?} due to conflict at {:?}", src, dest, at);
-            }
-            remove == CandidateFilter::Keep
-        });
-    }
-
-    /// `vec_filter_candidates` but for an `Entry`
-    fn entry_filter_candidates(
-        mut entry: IndexOccupiedEntry<'_, Local, Vec<Local>>,
-        p: Local,
-        f: impl FnMut(Local) -> CandidateFilter,
-        at: Location,
-    ) {
-        let candidates = entry.get_mut();
-        Self::vec_filter_candidates(p, candidates, f, at);
-        if candidates.len() == 0 {
-            // FIXME(#120456) - is `swap_remove` correct?
-            entry.swap_remove();
-        }
-    }
-
-    /// For all candidates `(p, q)` or `(q, p)` removes the candidate if `f(q)` says to do so
-    fn filter_candidates_by(
-        &mut self,
-        p: Local,
-        mut f: impl FnMut(Local) -> CandidateFilter,
-        at: Location,
-    ) {
-        // Cover the cases where `p` appears as a `src`
-        if let IndexEntry::Occupied(entry) = self.c.entry(p) {
-            Self::entry_filter_candidates(entry, p, &mut f, at);
-        }
-        // And the cases where `p` appears as a `dest`
-        let Some(srcs) = self.reverse.get_mut(&p) else {
-            return;
-        };
-        // We use `retain` here to remove the elements from the reverse set if we've removed the
-        // matching candidate in the forward set.
-        srcs.retain(|src| {
-            if f(*src) == CandidateFilter::Keep {
-                return true;
-            }
-            let IndexEntry::Occupied(entry) = self.c.entry(*src) else {
-                return false;
-            };
-            Self::entry_filter_candidates(
-                entry,
-                *src,
-                |dest| {
-                    if dest == p { CandidateFilter::Remove } else { CandidateFilter::Keep }
-                },
-                at,
-            );
-            false
-        });
-    }
+#[derive(Debug)]
+struct RelevantLocals {
+    original: IndexVec<RelevantLocal, Local>,
+    shrink: IndexVec<Local, Option<RelevantLocal>>,
+    renames: UnionFind<RelevantLocal>,
 }
 
-#[derive(Copy, Clone, PartialEq, Eq)]
-enum CandidateFilter {
-    Keep,
-    Remove,
-}
+impl RelevantLocals {
+    #[tracing::instrument(level = "trace", skip(candidates, num_locals), ret)]
+    fn compute(candidates: &Candidates, num_locals: usize) -> RelevantLocals {
+        let mut original = IndexVec::with_capacity(candidates.c.len());
+        let mut shrink = IndexVec::from_elem_n(None, num_locals);
 
-impl<'a, 'tcx> FilterInformation<'a, 'tcx> {
-    /// Filters the set of candidates to remove those that conflict.
-    ///
-    /// The steps we take are exactly those that are outlined at the top of the file. For each
-    /// statement/terminator, we collect the set of locals that are written to in that
-    /// statement/terminator, and then we remove all pairs of candidates that contain one such local
-    /// and another one that is live.
-    ///
-    /// We need to be careful about the ordering of operations within each statement/terminator
-    /// here. Many statements might write and read from more than one place, and we need to consider
-    /// them all. The strategy for doing this is as follows: We first gather all the places that are
-    /// written to within the statement/terminator via `WriteInfo`. Then, we use the liveness
-    /// analysis from *before* the statement/terminator (in the control flow sense) to eliminate
-    /// candidates - this is because we want to conservatively treat a pair of locals that is both
-    /// read and written in the statement/terminator to be conflicting, and the liveness analysis
-    /// before the statement/terminator will correctly report locals that are read in the
-    /// statement/terminator to be live. We are additionally conservative by treating all written to
-    /// locals as also being read from.
-    fn filter_liveness(
-        candidates: &mut Candidates,
-        points: &DenseLocationMap,
-        live: &SparseIntervalMatrix<Local, PointIndex>,
-        write_info: &mut WriteInfo,
-        body: &Body<'tcx>,
-    ) {
-        let mut this = FilterInformation {
-            body,
-            points,
-            live,
-            candidates,
-            // We don't actually store anything at this scope, we just keep things here to be able
-            // to reuse the allocation.
-            write_info,
-            // Doesn't matter what we put here, will be overwritten before being used
-            at: Location::START,
+        // Mark a local as relevant and record it into the maps.
+        let mut declare = |local| {
+            shrink.get_or_insert_with(local, || original.push(local));
         };
-        this.internal_filter_liveness();
-    }
-
-    fn internal_filter_liveness(&mut self) {
-        for (block, data) in traversal::preorder(self.body) {
-            self.at = Location { block, statement_index: data.statements.len() };
-            self.write_info.for_terminator(&data.terminator().kind);
-            self.apply_conflicts();
-
-            for (i, statement) in data.statements.iter().enumerate().rev() {
-                self.at = Location { block, statement_index: i };
-                self.write_info.for_statement(&statement.kind, self.body);
-                self.apply_conflicts();
-            }
-        }
-    }
-
-    fn apply_conflicts(&mut self) {
-        let writes = &self.write_info.writes;
-        for p in writes {
-            let other_skip = self.write_info.skip_pair.and_then(|(a, b)| {
-                if a == *p {
-                    Some(b)
-                } else if b == *p {
-                    Some(a)
-                } else {
-                    None
-                }
-            });
-            let at = self.points.point_from_location(self.at);
-            self.candidates.filter_candidates_by(
-                *p,
-                |q| {
-                    if Some(q) == other_skip {
-                        return CandidateFilter::Keep;
-                    }
-                    // It is possible that a local may be live for less than the
-                    // duration of a statement This happens in the case of function
-                    // calls or inline asm. Because of this, we also mark locals as
-                    // conflicting when both of them are written to in the same
-                    // statement.
-                    if self.live.contains(q, at) || writes.contains(&q) {
-                        CandidateFilter::Remove
-                    } else {
-                        CandidateFilter::Keep
-                    }
-                },
-                self.at,
-            );
-        }
-    }
-}
-
-/// Describes where a statement/terminator writes to
-#[derive(Default, Debug)]
-struct WriteInfo {
-    writes: Vec<Local>,
-    /// If this pair of locals is a candidate pair, completely skip processing it during this
-    /// statement. All other candidates are unaffected.
-    skip_pair: Option<(Local, Local)>,
-}
-
-impl WriteInfo {
-    fn for_statement<'tcx>(&mut self, statement: &StatementKind<'tcx>, body: &Body<'tcx>) {
-        self.reset();
-        match statement {
-            StatementKind::Assign(box (lhs, rhs)) => {
-                self.add_place(*lhs);
-                match rhs {
-                    Rvalue::Use(op) => {
-                        self.add_operand(op);
-                        self.consider_skipping_for_assign_use(*lhs, op, body);
-                    }
-                    Rvalue::Repeat(op, _) => {
-                        self.add_operand(op);
-                    }
-                    Rvalue::Cast(_, op, _)
-                    | Rvalue::UnaryOp(_, op)
-                    | Rvalue::ShallowInitBox(op, _) => {
-                        self.add_operand(op);
-                    }
-                    Rvalue::BinaryOp(_, ops) => {
-                        for op in [&ops.0, &ops.1] {
-                            self.add_operand(op);
-                        }
-                    }
-                    Rvalue::Aggregate(_, ops) => {
-                        for op in ops {
-                            self.add_operand(op);
-                        }
-                    }
-                    Rvalue::WrapUnsafeBinder(op, _) => {
-                        self.add_operand(op);
-                    }
-                    Rvalue::ThreadLocalRef(_)
-                    | Rvalue::NullaryOp(_, _)
-                    | Rvalue::Ref(_, _, _)
-                    | Rvalue::RawPtr(_, _)
-                    | Rvalue::Len(_)
-                    | Rvalue::Discriminant(_)
-                    | Rvalue::CopyForDeref(_) => {}
-                }
-            }
-            // Retags are technically also reads, but reporting them as a write suffices
-            StatementKind::SetDiscriminant { place, .. }
-            | StatementKind::Deinit(place)
-            | StatementKind::Retag(_, place) => {
-                self.add_place(**place);
-            }
-            StatementKind::Intrinsic(_)
-            | StatementKind::ConstEvalCounter
-            | StatementKind::Nop
-            | StatementKind::Coverage(_)
-            | StatementKind::StorageLive(_)
-            | StatementKind::StorageDead(_)
-            | StatementKind::BackwardIncompatibleDropHint { .. }
-            | StatementKind::PlaceMention(_) => {}
-            StatementKind::FakeRead(_) | StatementKind::AscribeUserType(_, _) => {
-                bug!("{:?} not found in this MIR phase", statement)
-            }
-        }
-    }
 
-    fn consider_skipping_for_assign_use<'tcx>(
-        &mut self,
-        lhs: Place<'tcx>,
-        rhs: &Operand<'tcx>,
-        body: &Body<'tcx>,
-    ) {
-        let Some(rhs) = rhs.place() else { return };
-        if let Some(pair) = places_to_candidate_pair(lhs, rhs, body) {
-            self.skip_pair = Some(pair);
+        for &(src, dest) in candidates.c.iter() {
+            declare(src);
+            declare(dest)
         }
-    }
 
-    fn for_terminator<'tcx>(&mut self, terminator: &TerminatorKind<'tcx>) {
-        self.reset();
-        match terminator {
-            TerminatorKind::SwitchInt { discr: op, .. }
-            | TerminatorKind::Assert { cond: op, .. } => {
-                self.add_operand(op);
-            }
-            TerminatorKind::Call { destination, func, args, .. } => {
-                self.add_place(*destination);
-                self.add_operand(func);
-                for arg in args {
-                    self.add_operand(&arg.node);
-                }
-            }
-            TerminatorKind::TailCall { func, args, .. } => {
-                self.add_operand(func);
-                for arg in args {
-                    self.add_operand(&arg.node);
-                }
-            }
-            TerminatorKind::InlineAsm { operands, .. } => {
-                for asm_operand in operands {
-                    match asm_operand {
-                        InlineAsmOperand::In { value, .. } => {
-                            self.add_operand(value);
-                        }
-                        InlineAsmOperand::Out { place, .. } => {
-                            if let Some(place) = place {
-                                self.add_place(*place);
-                            }
-                        }
-                        // Note that the `late` field in `InOut` is about whether the registers used
-                        // for these things overlap, and is of absolutely no interest to us.
-                        InlineAsmOperand::InOut { in_value, out_place, .. } => {
-                            if let Some(place) = out_place {
-                                self.add_place(*place);
-                            }
-                            self.add_operand(in_value);
-                        }
-                        InlineAsmOperand::Const { .. }
-                        | InlineAsmOperand::SymFn { .. }
-                        | InlineAsmOperand::SymStatic { .. }
-                        | InlineAsmOperand::Label { .. } => {}
-                    }
-                }
-            }
-            TerminatorKind::Goto { .. }
-            | TerminatorKind::UnwindResume
-            | TerminatorKind::UnwindTerminate(_)
-            | TerminatorKind::Return
-            | TerminatorKind::Unreachable { .. } => (),
-            TerminatorKind::Drop { .. } => {
-                // `Drop`s create a `&mut` and so are not considered
-            }
-            TerminatorKind::Yield { .. }
-            | TerminatorKind::CoroutineDrop
-            | TerminatorKind::FalseEdge { .. }
-            | TerminatorKind::FalseUnwind { .. } => {
-                bug!("{:?} not found in this MIR phase", terminator)
-            }
-        }
+        let renames = UnionFind::new(original.len());
+        RelevantLocals { original, shrink, renames }
     }
 
-    fn add_place(&mut self, place: Place<'_>) {
-        self.writes.push(place.local);
-    }
-
-    fn add_operand<'tcx>(&mut self, op: &Operand<'tcx>) {
-        match op {
-            // FIXME(JakobDegen): In a previous version, the `Move` case was incorrectly treated as
-            // being a read only. This was unsound, however we cannot add a regression test because
-            // it is not possible to set this off with current MIR. Once we have that ability, a
-            // regression test should be added.
-            Operand::Move(p) => self.add_place(*p),
-            Operand::Copy(_) | Operand::Constant(_) => (),
-        }
+    fn find(&mut self, src: Local) -> Option<RelevantLocal> {
+        let src = self.shrink[src]?;
+        let src = self.renames.find(src);
+        Some(src)
     }
 
-    fn reset(&mut self) {
-        self.writes.clear();
-        self.skip_pair = None;
+    fn union(&mut self, lhs: RelevantLocal, rhs: RelevantLocal) -> RelevantLocal {
+        let head = self.renames.unify(lhs, rhs);
+        // We need to ensure we keep the original local of the RHS, as it may be a required local.
+        self.original[head] = self.original[rhs];
+        head
     }
 }
 
 /////////////////////////////////////////////////////
 // Candidate accumulation
 
-/// If the pair of places is being considered for merging, returns the candidate which would be
-/// merged in order to accomplish this.
-///
-/// The contract here is in one direction - there is a guarantee that merging the locals that are
-/// outputted by this function would result in an assignment between the inputs becoming a
-/// self-assignment. However, there is no guarantee that the returned pair is actually suitable for
-/// merging - candidate collection must still check this independently.
-///
-/// This output is unique for each unordered pair of input places.
-fn places_to_candidate_pair<'tcx>(
-    a: Place<'tcx>,
-    b: Place<'tcx>,
-    body: &Body<'tcx>,
-) -> Option<(Local, Local)> {
-    let (mut a, mut b) = if a.projection.len() == 0 && b.projection.len() == 0 {
-        (a.local, b.local)
-    } else {
-        return None;
-    };
+#[derive(Debug, Default)]
+struct Candidates {
+    /// The set of candidates we are considering in this optimization.
+    ///
+    /// Whether a place ends up in the key or the value does not correspond to whether it appears as
+    /// the lhs or rhs of any assignment. As a matter of fact, the places in here might never appear
+    /// in an assignment at all. This happens because if we see an assignment like this:
+    ///
+    /// ```ignore (syntax-highlighting-only)
+    /// _1.0 = _2.0
+    /// ```
+    ///
+    /// We will still report that we would like to merge `_1` and `_2` in an attempt to allow us to
+    /// remove that assignment.
+    c: Vec<(Local, Local)>,
+}
 
-    // By sorting, we make sure we're input order independent
-    if a > b {
-        std::mem::swap(&mut a, &mut b);
-    }
+// We first implement some utility functions which we will expose removing candidates according to
+// different needs. Throughout the liveness filtering, the `candidates` are only ever accessed
+// through these methods, and not directly.
+impl Candidates {
+    /// Collects the candidates for merging.
+    ///
+    /// This is responsible for enforcing the first and third bullet point.
+    fn find(body: &Body<'_>, borrowed: &DenseBitSet<Local>) -> Candidates {
+        let mut visitor = FindAssignments { body, candidates: Default::default(), borrowed };
+        visitor.visit_body(body);
 
-    // We could now return `(a, b)`, but then we miss some candidates in the case where `a` can't be
-    // used as a `src`.
-    if is_local_required(a, body) {
-        std::mem::swap(&mut a, &mut b);
+        Candidates { c: visitor.candidates }
     }
-    // We could check `is_local_required` again here, but there's no need - after all, we make no
-    // promise that the candidate pair is actually valid
-    Some((a, b))
 }
 
 struct FindAssignments<'a, 'tcx> {
     body: &'a Body<'tcx>,
-    candidates: &'a mut FxIndexMap<Local, Vec<Local>>,
+    candidates: Vec<(Local, Local)>,
     borrowed: &'a DenseBitSet<Local>,
 }
 
@@ -753,11 +402,9 @@ impl<'tcx> Visitor<'tcx> for FindAssignments<'_, 'tcx> {
             lhs,
             Rvalue::CopyForDeref(rhs) | Rvalue::Use(Operand::Copy(rhs) | Operand::Move(rhs)),
         )) = &statement.kind
+            && let Some(src) = lhs.as_local()
+            && let Some(dest) = rhs.as_local()
         {
-            let Some((src, dest)) = places_to_candidate_pair(*lhs, *rhs, self.body) else {
-                return;
-            };
-
             // As described at the top of the file, we do not go near things that have
             // their address taken.
             if self.borrowed.contains(src) || self.borrowed.contains(dest) {
@@ -774,13 +421,8 @@ impl<'tcx> Visitor<'tcx> for FindAssignments<'_, 'tcx> {
                 return;
             }
 
-            // Also, we need to make sure that MIR actually allows the `src` to be removed
-            if is_local_required(src, self.body) {
-                return;
-            }
-
             // We may insert duplicates here, but that's fine
-            self.candidates.entry(src).or_default().push(dest);
+            self.candidates.push((src, dest));
         }
     }
 }
@@ -803,18 +445,165 @@ fn dest_prop_mir_dump<'tcx>(
     tcx: TyCtxt<'tcx>,
     body: &Body<'tcx>,
     points: &DenseLocationMap,
-    live: &SparseIntervalMatrix<Local, PointIndex>,
-    round: usize,
+    live: &SparseIntervalMatrix<RelevantLocal, TwoStepIndex>,
+    relevant: &RelevantLocals,
 ) {
     let locals_live_at = |location| {
-        let location = points.point_from_location(location);
-        live.rows().filter(|&r| live.contains(r, location)).collect::<Vec<_>>()
+        live.rows()
+            .filter(|&r| live.contains(r, location))
+            .map(|rl| relevant.original[rl])
+            .collect::<Vec<_>>()
     };
-    dump_mir(tcx, false, "DestinationPropagation-dataflow", &round, body, |pass_where, w| {
-        if let PassWhere::BeforeLocation(loc) = pass_where {
-            writeln!(w, "        // live: {:?}", locals_live_at(loc))?;
+
+    if let Some(dumper) = MirDumper::new(tcx, "DestinationPropagation-dataflow", body) {
+        let extra_data = &|pass_where, w: &mut dyn std::io::Write| {
+            if let PassWhere::BeforeLocation(loc) = pass_where {
+                let location = TwoStepIndex::new(points, loc, Effect::Before);
+                let live = locals_live_at(location);
+                writeln!(w, "        // before: {:?} => {:?}", location, live)?;
+            }
+            if let PassWhere::AfterLocation(loc) = pass_where {
+                let location = TwoStepIndex::new(points, loc, Effect::After);
+                let live = locals_live_at(location);
+                writeln!(w, "        // after: {:?} => {:?}", location, live)?;
+            }
+            Ok(())
+        };
+
+        dumper.set_extra_data(extra_data).dump_mir(body)
+    }
+}
+
+#[derive(Copy, Clone, Debug)]
+enum Effect {
+    Before,
+    After,
+}
+
+rustc_index::newtype_index! {
+    /// A reversed `PointIndex` but with the lower bit encoding early/late inside the statement.
+    /// The reversed order allows to use the more efficient `IntervalSet::append` method while we
+    /// iterate on the statements in reverse order.
+    #[orderable]
+    #[debug_format = "TwoStepIndex({})"]
+    struct TwoStepIndex {}
+}
+
+impl TwoStepIndex {
+    fn new(elements: &DenseLocationMap, location: Location, effect: Effect) -> TwoStepIndex {
+        let point = elements.point_from_location(location);
+        let effect = match effect {
+            Effect::Before => 0,
+            Effect::After => 1,
+        };
+        let max_index = 2 * elements.num_points() as u32 - 1;
+        let index = 2 * point.as_u32() + (effect as u32);
+        // Reverse the indexing to use more efficient `IntervalSet::append`.
+        TwoStepIndex::from_u32(max_index - index)
+    }
+}
+
+/// Add points depending on the result of the given dataflow analysis.
+fn save_as_intervals<'tcx>(
+    elements: &DenseLocationMap,
+    body: &Body<'tcx>,
+    relevant: &RelevantLocals,
+    results: Results<DenseBitSet<Local>>,
+) -> SparseIntervalMatrix<RelevantLocal, TwoStepIndex> {
+    let mut values = SparseIntervalMatrix::new(2 * elements.num_points());
+    let mut state = MaybeLiveLocals.bottom_value(body);
+    let reachable_blocks = traversal::reachable_as_bitset(body);
+
+    let two_step_loc = |location, effect| TwoStepIndex::new(elements, location, effect);
+    let append_at =
+        |values: &mut SparseIntervalMatrix<_, _>, state: &DenseBitSet<Local>, twostep| {
+            for (relevant, &original) in relevant.original.iter_enumerated() {
+                if state.contains(original) {
+                    values.append(relevant, twostep);
+                }
+            }
+        };
+
+    // Iterate blocks in decreasing order, to visit locations in decreasing order. This
+    // allows to use the more efficient `append` method to interval sets.
+    for block in body.basic_blocks.indices().rev() {
+        if !reachable_blocks.contains(block) {
+            continue;
         }
 
-        Ok(())
-    });
+        state.clone_from(&results[block]);
+
+        let block_data = &body.basic_blocks[block];
+        let loc = Location { block, statement_index: block_data.statements.len() };
+
+        let term = block_data.terminator();
+        let mut twostep = two_step_loc(loc, Effect::After);
+        append_at(&mut values, &state, twostep);
+        // Ensure we have a non-zero live range even for dead stores. This is done by marking all
+        // the written-to locals as live in the second half of the statement.
+        // We also ensure that operands read by terminators conflict with writes by that terminator.
+        // For instance a function call may read args after having written to the destination.
+        VisitPlacesWith(|place: Place<'tcx>, ctxt| {
+            if let Some(relevant) = relevant.shrink[place.local] {
+                match DefUse::for_place(place, ctxt) {
+                    DefUse::Def | DefUse::Use | DefUse::PartialWrite => {
+                        values.insert(relevant, twostep);
+                    }
+                    DefUse::NonUse => {}
+                }
+            }
+        })
+        .visit_terminator(term, loc);
+
+        twostep = TwoStepIndex::from_u32(twostep.as_u32() + 1);
+        debug_assert_eq!(twostep, two_step_loc(loc, Effect::Before));
+        MaybeLiveLocals.apply_early_terminator_effect(&mut state, term, loc);
+        MaybeLiveLocals.apply_primary_terminator_effect(&mut state, term, loc);
+        append_at(&mut values, &state, twostep);
+
+        for (statement_index, stmt) in block_data.statements.iter().enumerate().rev() {
+            let loc = Location { block, statement_index };
+            twostep = TwoStepIndex::from_u32(twostep.as_u32() + 1);
+            debug_assert_eq!(twostep, two_step_loc(loc, Effect::After));
+            append_at(&mut values, &state, twostep);
+            // Like terminators, ensure we have a non-zero live range even for dead stores.
+            // Some rvalues interleave reads and writes, for instance `Rvalue::Aggregate`, see
+            // https://github.com/rust-lang/rust/issues/146383. By precaution, treat statements
+            // as behaving so by default.
+            // We make an exception for simple assignments `_a.stuff = {copy|move} _b.stuff`,
+            // as marking `_b` live here would prevent unification.
+            let is_simple_assignment = match stmt.kind {
+                StatementKind::Assign(box (
+                    lhs,
+                    Rvalue::CopyForDeref(rhs)
+                    | Rvalue::Use(Operand::Copy(rhs) | Operand::Move(rhs)),
+                )) => lhs.projection == rhs.projection,
+                _ => false,
+            };
+            VisitPlacesWith(|place: Place<'tcx>, ctxt| {
+                if let Some(relevant) = relevant.shrink[place.local] {
+                    match DefUse::for_place(place, ctxt) {
+                        DefUse::Def | DefUse::PartialWrite => {
+                            values.insert(relevant, twostep);
+                        }
+                        DefUse::Use if !is_simple_assignment => {
+                            values.insert(relevant, twostep);
+                        }
+                        DefUse::Use | DefUse::NonUse => {}
+                    }
+                }
+            })
+            .visit_statement(stmt, loc);
+
+            twostep = TwoStepIndex::from_u32(twostep.as_u32() + 1);
+            debug_assert_eq!(twostep, two_step_loc(loc, Effect::Before));
+            MaybeLiveLocals.apply_early_statement_effect(&mut state, stmt, loc);
+            MaybeLiveLocals.apply_primary_statement_effect(&mut state, stmt, loc);
+            // ... but reads from operands are marked as live here so they do not conflict with
+            // the all the writes we manually marked as live in the second half of the statement.
+            append_at(&mut values, &state, twostep);
+        }
+    }
+
+    values
 }
diff --git a/compiler/rustc_mir_transform/src/elaborate_drop.rs b/compiler/rustc_mir_transform/src/elaborate_drop.rs
index de96b1f255a..4f3c53d761f 100644
--- a/compiler/rustc_mir_transform/src/elaborate_drop.rs
+++ b/compiler/rustc_mir_transform/src/elaborate_drop.rs
@@ -611,6 +611,7 @@ where
     ///
     /// For example, with 3 fields, the drop ladder is
     ///
+    /// ```text
     /// .d0:
     ///     ELAB(drop location.0 [target=.d1, unwind=.c1])
     /// .d1:
@@ -621,8 +622,10 @@ where
     ///     ELAB(drop location.1 [target=.c2])
     /// .c2:
     ///     ELAB(drop location.2 [target=`self.unwind`])
+    /// ```
     ///
     /// For possible-async drops in coroutines we also need dropline ladder
+    /// ```text
     /// .d0 (mainline):
     ///     ELAB(drop location.0 [target=.d1, unwind=.c1, drop=.e1])
     /// .d1 (mainline):
@@ -637,6 +640,7 @@ where
     ///     ELAB(drop location.1 [target=.e2, unwind=.c2])
     /// .e2 (dropline):
     ///     ELAB(drop location.2 [target=`self.drop`, unwind=`self.unwind`])
+    /// ```
     ///
     /// NOTE: this does not clear the master drop flag, so you need
     /// to point succ/unwind on a `drop_ladder_bottom`.
@@ -761,24 +765,37 @@ where
 
         let skip_contents = adt.is_union() || adt.is_manually_drop();
         let contents_drop = if skip_contents {
+            if adt.has_dtor(self.tcx()) && self.elaborator.get_drop_flag(self.path).is_some() {
+                // the top-level drop flag is usually cleared by open_drop_for_adt_contents
+                // types with destructors would still need an empty drop ladder to clear it
+
+                // however, these types are only open dropped in `DropShimElaborator`
+                // which does not have drop flags
+                // a future box-like "DerefMove" trait would allow for this case to happen
+                span_bug!(self.source_info.span, "open dropping partially moved union");
+            }
+
             (self.succ, self.unwind, self.dropline)
         } else {
             self.open_drop_for_adt_contents(adt, args)
         };
 
-        if adt.is_box() {
-            // we need to drop the inside of the box before running the destructor
-            let succ = self.destructor_call_block_sync((contents_drop.0, contents_drop.1));
-            let unwind = contents_drop
-                .1
-                .map(|unwind| self.destructor_call_block_sync((unwind, Unwind::InCleanup)));
-            let dropline = contents_drop
-                .2
-                .map(|dropline| self.destructor_call_block_sync((dropline, contents_drop.1)));
-
-            self.open_drop_for_box_contents(adt, args, succ, unwind, dropline)
-        } else if adt.has_dtor(self.tcx()) {
-            self.destructor_call_block(contents_drop)
+        if adt.has_dtor(self.tcx()) {
+            let destructor_block = if adt.is_box() {
+                // we need to drop the inside of the box before running the destructor
+                let succ = self.destructor_call_block_sync((contents_drop.0, contents_drop.1));
+                let unwind = contents_drop
+                    .1
+                    .map(|unwind| self.destructor_call_block_sync((unwind, Unwind::InCleanup)));
+                let dropline = contents_drop
+                    .2
+                    .map(|dropline| self.destructor_call_block_sync((dropline, contents_drop.1)));
+                self.open_drop_for_box_contents(adt, args, succ, unwind, dropline)
+            } else {
+                self.destructor_call_block(contents_drop)
+            };
+
+            self.drop_flag_test_block(destructor_block, contents_drop.0, contents_drop.1)
         } else {
             contents_drop.0
         }
@@ -982,12 +999,7 @@ where
             unwind.is_cleanup(),
         );
 
-        let destructor_block = self.elaborator.patch().new_block(result);
-
-        let block_start = Location { block: destructor_block, statement_index: 0 };
-        self.elaborator.clear_drop_flag(block_start, self.path, DropFlagMode::Shallow);
-
-        self.drop_flag_test_block(destructor_block, succ, unwind)
+        self.elaborator.patch().new_block(result)
     }
 
     fn destructor_call_block(
@@ -1002,13 +1014,7 @@ where
             && !unwind.is_cleanup()
             && ty.is_async_drop(self.tcx(), self.elaborator.typing_env())
         {
-            let destructor_block =
-                self.build_async_drop(self.place, ty, None, succ, unwind, dropline, true);
-
-            let block_start = Location { block: destructor_block, statement_index: 0 };
-            self.elaborator.clear_drop_flag(block_start, self.path, DropFlagMode::Shallow);
-
-            self.drop_flag_test_block(destructor_block, succ, unwind)
+            self.build_async_drop(self.place, ty, None, succ, unwind, dropline, true)
         } else {
             self.destructor_call_block_sync((succ, unwind))
         }
diff --git a/compiler/rustc_mir_transform/src/elaborate_drops.rs b/compiler/rustc_mir_transform/src/elaborate_drops.rs
index b4fa2be1d00..58dff4514a0 100644
--- a/compiler/rustc_mir_transform/src/elaborate_drops.rs
+++ b/compiler/rustc_mir_transform/src/elaborate_drops.rs
@@ -253,8 +253,8 @@ struct ElaborateDropsCtxt<'a, 'tcx> {
 }
 
 impl fmt::Debug for ElaborateDropsCtxt<'_, '_> {
-    fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        Ok(())
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("ElaborateDropsCtxt").finish_non_exhaustive()
     }
 }
 
diff --git a/compiler/rustc_mir_transform/src/errors.rs b/compiler/rustc_mir_transform/src/errors.rs
index cffa0183fa7..775f5f9a7cf 100644
--- a/compiler/rustc_mir_transform/src/errors.rs
+++ b/compiler/rustc_mir_transform/src/errors.rs
@@ -2,6 +2,7 @@ use rustc_errors::codes::*;
 use rustc_errors::{Diag, LintDiagnostic};
 use rustc_macros::{Diagnostic, LintDiagnostic, Subdiagnostic};
 use rustc_middle::mir::AssertKind;
+use rustc_middle::query::Key;
 use rustc_middle::ty::TyCtxt;
 use rustc_session::lint::{self, Lint};
 use rustc_span::def_id::DefId;
@@ -9,6 +10,46 @@ use rustc_span::{Ident, Span, Symbol};
 
 use crate::fluent_generated as fluent;
 
+/// Emit diagnostic for calls to `#[inline(always)]`-annotated functions with a
+/// `#[target_feature]` attribute where the caller enables a different set of target features.
+pub(crate) fn emit_inline_always_target_feature_diagnostic<'a, 'tcx>(
+    tcx: TyCtxt<'tcx>,
+    call_span: Span,
+    callee_def_id: DefId,
+    caller_def_id: DefId,
+    callee_only: &[&'a str],
+) {
+    let callee = tcx.def_path_str(callee_def_id);
+    let caller = tcx.def_path_str(caller_def_id);
+
+    tcx.node_span_lint(
+        lint::builtin::INLINE_ALWAYS_MISMATCHING_TARGET_FEATURES,
+        tcx.local_def_id_to_hir_id(caller_def_id.as_local().unwrap()),
+        call_span,
+        |lint| {
+            lint.primary_message(format!(
+                "call to `#[inline(always)]`-annotated `{callee}` \
+                requires the same target features to be inlined"
+            ));
+            lint.note("function will not be inlined");
+
+            lint.note(format!(
+                "the following target features are on `{callee}` but missing from `{caller}`: {}",
+                callee_only.join(", ")
+            ));
+            lint.span_note(callee_def_id.default_span(tcx), format!("`{callee}` is defined here"));
+
+            let feats = callee_only.join(",");
+            lint.span_suggestion(
+                tcx.def_span(caller_def_id).shrink_to_lo(),
+                format!("add `#[target_feature]` attribute to `{caller}`"),
+                format!("#[target_feature(enable = \"{feats}\")]\n"),
+                lint::Applicability::MaybeIncorrect,
+            );
+        },
+    );
+}
+
 #[derive(LintDiagnostic)]
 #[diag(mir_transform_unconditional_recursion)]
 #[help]
@@ -117,14 +158,6 @@ pub(crate) struct FnItemRef {
     pub ident: Ident,
 }
 
-#[derive(Diagnostic)]
-#[diag(mir_transform_exceeds_mcdc_test_vector_limit)]
-pub(crate) struct MCDCExceedsTestVectorLimit {
-    #[primary_span]
-    pub(crate) span: Span,
-    pub(crate) max_num_test_vectors: usize,
-}
-
 pub(crate) struct MustNotSupend<'a, 'tcx> {
     pub tcx: TyCtxt<'tcx>,
     pub yield_sp: Span,
diff --git a/compiler/rustc_mir_transform/src/gvn.rs b/compiler/rustc_mir_transform/src/gvn.rs
index 6b11706d2b5..ebec3d12500 100644
--- a/compiler/rustc_mir_transform/src/gvn.rs
+++ b/compiler/rustc_mir_transform/src/gvn.rs
@@ -85,15 +85,18 @@
 //! that contain `AllocId`s.
 
 use std::borrow::Cow;
+use std::hash::{Hash, Hasher};
 
 use either::Either;
+use hashbrown::hash_table::{Entry, HashTable};
+use itertools::Itertools as _;
 use rustc_abi::{self as abi, BackendRepr, FIRST_VARIANT, FieldIdx, Primitive, Size, VariantIdx};
 use rustc_const_eval::const_eval::DummyMachine;
 use rustc_const_eval::interpret::{
     ImmTy, Immediate, InterpCx, MemPlaceMeta, MemoryKind, OpTy, Projectable, Scalar,
     intern_const_alloc_for_constprop,
 };
-use rustc_data_structures::fx::{FxIndexSet, MutableValues};
+use rustc_data_structures::fx::FxHasher;
 use rustc_data_structures::graph::dominators::Dominators;
 use rustc_hir::def::DefKind;
 use rustc_index::bit_set::DenseBitSet;
@@ -105,7 +108,6 @@ use rustc_middle::mir::*;
 use rustc_middle::ty::layout::HasTypingEnv;
 use rustc_middle::ty::{self, Ty, TyCtxt};
 use rustc_span::DUMMY_SP;
-use rustc_span::def_id::DefId;
 use smallvec::SmallVec;
 use tracing::{debug, instrument, trace};
 
@@ -130,7 +132,7 @@ impl<'tcx> crate::MirPass<'tcx> for GVN {
         let mut state = VnState::new(tcx, body, typing_env, &ssa, dominators, &body.local_decls);
 
         for local in body.args_iter().filter(|&local| ssa.is_ssa(local)) {
-            let opaque = state.new_opaque();
+            let opaque = state.new_opaque(body.local_decls[local].ty);
             state.assign(local, opaque);
         }
 
@@ -152,23 +154,27 @@ impl<'tcx> crate::MirPass<'tcx> for GVN {
 }
 
 newtype_index! {
+    /// This represents a `Value` in the symbolic execution.
+    #[debug_format = "_v{}"]
     struct VnIndex {}
 }
 
-/// Computing the aggregate's type can be quite slow, so we only keep the minimal amount of
-/// information to reconstruct it when needed.
-#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
-enum AggregateTy<'tcx> {
-    /// Invariant: this must not be used for an empty array.
-    Array,
-    Tuple,
-    Def(DefId, ty::GenericArgsRef<'tcx>),
-    RawPtr {
-        /// Needed for cast propagation.
-        data_pointer_ty: Ty<'tcx>,
-        /// The data pointer can be anything thin, so doesn't determine the output.
-        output_pointer_ty: Ty<'tcx>,
-    },
+/// Marker type to forbid hashing and comparing opaque values.
+/// This struct should only be constructed by `ValueSet::insert_unique` to ensure we use that
+/// method to create non-unifiable values. It will ICE if used in `ValueSet::insert`.
+#[derive(Copy, Clone, Debug, Eq)]
+struct VnOpaque;
+impl PartialEq for VnOpaque {
+    fn eq(&self, _: &VnOpaque) -> bool {
+        // ICE if we try to compare unique values
+        unreachable!()
+    }
+}
+impl Hash for VnOpaque {
+    fn hash<T: Hasher>(&self, _: &mut T) {
+        // ICE if we try to hash unique values
+        unreachable!()
+    }
 }
 
 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
@@ -182,18 +188,27 @@ enum Value<'tcx> {
     // Root values.
     /// Used to represent values we know nothing about.
     /// The `usize` is a counter incremented by `new_opaque`.
-    Opaque(usize),
+    Opaque(VnOpaque),
     /// Evaluated or unevaluated constant value.
     Constant {
         value: Const<'tcx>,
         /// Some constants do not have a deterministic value. To avoid merging two instances of the
         /// same `Const`, we assign them an additional integer index.
-        // `disambiguator` is 0 iff the constant is deterministic.
-        disambiguator: usize,
+        // `disambiguator` is `None` iff the constant is deterministic.
+        disambiguator: Option<VnOpaque>,
     },
+
+    // Aggregates.
     /// An aggregate value, either tuple/closure/struct/enum.
     /// This does not contain unions, as we cannot reason with the value.
-    Aggregate(AggregateTy<'tcx>, VariantIdx, Vec<VnIndex>),
+    Aggregate(VariantIdx, Vec<VnIndex>),
+    /// A raw pointer aggregate built from a thin pointer and metadata.
+    RawPtr {
+        /// Thin pointer component. This is field 0 in MIR.
+        pointer: VnIndex,
+        /// Metadata component. This is field 1 in MIR.
+        metadata: VnIndex,
+    },
     /// This corresponds to a `[value; count]` expression.
     Repeat(VnIndex, ty::Const<'tcx>),
     /// The address of a place.
@@ -201,16 +216,14 @@ enum Value<'tcx> {
         place: Place<'tcx>,
         kind: AddressKind,
         /// Give each borrow and pointer a different provenance, so we don't merge them.
-        provenance: usize,
+        provenance: VnOpaque,
     },
 
     // Extractions.
     /// This is the *value* obtained by projecting another value.
-    Projection(VnIndex, ProjectionElem<VnIndex, Ty<'tcx>>),
+    Projection(VnIndex, ProjectionElem<VnIndex, ()>),
     /// Discriminant of the given value.
     Discriminant(VnIndex),
-    /// Length of an array or slice.
-    Len(VnIndex),
 
     // Operations.
     NullaryOp(NullOp<'tcx>, Ty<'tcx>),
@@ -219,25 +232,123 @@ enum Value<'tcx> {
     Cast {
         kind: CastKind,
         value: VnIndex,
-        from: Ty<'tcx>,
-        to: Ty<'tcx>,
     },
 }
 
+/// Stores and deduplicates pairs of `(Value, Ty)` into in `VnIndex` numbered values.
+///
+/// This data structure is mostly a partial reimplementation of `FxIndexMap<VnIndex, (Value, Ty)>`.
+/// We do not use a regular `FxIndexMap` to skip hashing values that are unique by construction,
+/// like opaque values, address with provenance and non-deterministic constants.
+struct ValueSet<'tcx> {
+    indices: HashTable<VnIndex>,
+    hashes: IndexVec<VnIndex, u64>,
+    values: IndexVec<VnIndex, Value<'tcx>>,
+    types: IndexVec<VnIndex, Ty<'tcx>>,
+}
+
+impl<'tcx> ValueSet<'tcx> {
+    fn new(num_values: usize) -> ValueSet<'tcx> {
+        ValueSet {
+            indices: HashTable::with_capacity(num_values),
+            hashes: IndexVec::with_capacity(num_values),
+            values: IndexVec::with_capacity(num_values),
+            types: IndexVec::with_capacity(num_values),
+        }
+    }
+
+    /// Insert a `(Value, Ty)` pair without hashing or deduplication.
+    /// This always creates a new `VnIndex`.
+    #[inline]
+    fn insert_unique(
+        &mut self,
+        ty: Ty<'tcx>,
+        value: impl FnOnce(VnOpaque) -> Value<'tcx>,
+    ) -> VnIndex {
+        let value = value(VnOpaque);
+
+        debug_assert!(match value {
+            Value::Opaque(_) | Value::Address { .. } => true,
+            Value::Constant { disambiguator, .. } => disambiguator.is_some(),
+            _ => false,
+        });
+
+        let index = self.hashes.push(0);
+        let _index = self.types.push(ty);
+        debug_assert_eq!(index, _index);
+        let _index = self.values.push(value);
+        debug_assert_eq!(index, _index);
+        index
+    }
+
+    /// Insert a `(Value, Ty)` pair to be deduplicated.
+    /// Returns `true` as second tuple field if this value did not exist previously.
+    #[allow(rustc::pass_by_value)] // closures take `&VnIndex`
+    fn insert(&mut self, ty: Ty<'tcx>, value: Value<'tcx>) -> (VnIndex, bool) {
+        debug_assert!(match value {
+            Value::Opaque(_) | Value::Address { .. } => false,
+            Value::Constant { disambiguator, .. } => disambiguator.is_none(),
+            _ => true,
+        });
+
+        let hash: u64 = {
+            let mut h = FxHasher::default();
+            value.hash(&mut h);
+            ty.hash(&mut h);
+            h.finish()
+        };
+
+        let eq = |index: &VnIndex| self.values[*index] == value && self.types[*index] == ty;
+        let hasher = |index: &VnIndex| self.hashes[*index];
+        match self.indices.entry(hash, eq, hasher) {
+            Entry::Occupied(entry) => {
+                let index = *entry.get();
+                (index, false)
+            }
+            Entry::Vacant(entry) => {
+                let index = self.hashes.push(hash);
+                entry.insert(index);
+                let _index = self.values.push(value);
+                debug_assert_eq!(index, _index);
+                let _index = self.types.push(ty);
+                debug_assert_eq!(index, _index);
+                (index, true)
+            }
+        }
+    }
+
+    /// Return the `Value` associated with the given `VnIndex`.
+    #[inline]
+    fn value(&self, index: VnIndex) -> &Value<'tcx> {
+        &self.values[index]
+    }
+
+    /// Return the type associated with the given `VnIndex`.
+    #[inline]
+    fn ty(&self, index: VnIndex) -> Ty<'tcx> {
+        self.types[index]
+    }
+
+    /// Replace the value associated with `index` with an opaque value.
+    #[inline]
+    fn forget(&mut self, index: VnIndex) {
+        self.values[index] = Value::Opaque(VnOpaque);
+    }
+}
+
 struct VnState<'body, 'tcx> {
     tcx: TyCtxt<'tcx>,
     ecx: InterpCx<'tcx, DummyMachine>,
     local_decls: &'body LocalDecls<'tcx>,
+    is_coroutine: bool,
     /// Value stored in each local.
     locals: IndexVec<Local, Option<VnIndex>>,
     /// Locals that are assigned that value.
     // This vector does not hold all the values of `VnIndex` that we create.
     rev_locals: IndexVec<VnIndex, SmallVec<[Local; 1]>>,
-    values: FxIndexSet<Value<'tcx>>,
+    values: ValueSet<'tcx>,
     /// Values evaluated as constants if possible.
     evaluated: IndexVec<VnIndex, Option<OpTy<'tcx>>>,
-    /// Counter to generate different values.
-    next_opaque: usize,
     /// Cache the deref values.
     derefs: Vec<VnIndex>,
     ssa: &'body SsaLocals,
@@ -265,11 +376,11 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
             tcx,
             ecx: InterpCx::new(tcx, DUMMY_SP, typing_env, DummyMachine),
             local_decls,
+            is_coroutine: body.coroutine.is_some(),
             locals: IndexVec::from_elem(None, local_decls),
             rev_locals: IndexVec::with_capacity(num_values),
-            values: FxIndexSet::with_capacity_and_hasher(num_values, Default::default()),
+            values: ValueSet::new(num_values),
             evaluated: IndexVec::with_capacity(num_values),
-            next_opaque: 1,
             derefs: Vec::new(),
             ssa,
             dominators,
@@ -282,9 +393,8 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
     }
 
     #[instrument(level = "trace", skip(self), ret)]
-    fn insert(&mut self, value: Value<'tcx>) -> VnIndex {
-        let (index, new) = self.values.insert_full(value);
-        let index = VnIndex::from_usize(index);
+    fn insert(&mut self, ty: Ty<'tcx>, value: Value<'tcx>) -> VnIndex {
+        let (index, new) = self.values.insert(ty, value);
         if new {
             // Grow `evaluated` and `rev_locals` here to amortize the allocations.
             let evaluated = self.eval_to_const(index);
@@ -296,29 +406,71 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
         index
     }
 
-    fn next_opaque(&mut self) -> usize {
-        let next_opaque = self.next_opaque;
-        self.next_opaque += 1;
-        next_opaque
-    }
-
     /// Create a new `Value` for which we have no information at all, except that it is distinct
     /// from all the others.
     #[instrument(level = "trace", skip(self), ret)]
-    fn new_opaque(&mut self) -> VnIndex {
-        let value = Value::Opaque(self.next_opaque());
-        self.insert(value)
+    fn new_opaque(&mut self, ty: Ty<'tcx>) -> VnIndex {
+        let index = self.values.insert_unique(ty, Value::Opaque);
+        let _index = self.evaluated.push(None);
+        debug_assert_eq!(index, _index);
+        let _index = self.rev_locals.push(SmallVec::new());
+        debug_assert_eq!(index, _index);
+        index
     }
 
     /// Create a new `Value::Address` distinct from all the others.
     #[instrument(level = "trace", skip(self), ret)]
     fn new_pointer(&mut self, place: Place<'tcx>, kind: AddressKind) -> VnIndex {
-        let value = Value::Address { place, kind, provenance: self.next_opaque() };
-        self.insert(value)
+        let pty = place.ty(self.local_decls, self.tcx).ty;
+        let ty = match kind {
+            AddressKind::Ref(bk) => {
+                Ty::new_ref(self.tcx, self.tcx.lifetimes.re_erased, pty, bk.to_mutbl_lossy())
+            }
+            AddressKind::Address(mutbl) => Ty::new_ptr(self.tcx, pty, mutbl.to_mutbl_lossy()),
+        };
+        let index =
+            self.values.insert_unique(ty, |provenance| Value::Address { place, kind, provenance });
+        let evaluated = self.eval_to_const(index);
+        let _index = self.evaluated.push(evaluated);
+        debug_assert_eq!(index, _index);
+        let _index = self.rev_locals.push(SmallVec::new());
+        debug_assert_eq!(index, _index);
+        index
     }
 
+    #[instrument(level = "trace", skip(self), ret)]
+    fn insert_constant(&mut self, value: Const<'tcx>) -> VnIndex {
+        let (index, new) = if value.is_deterministic() {
+            // The constant is deterministic, no need to disambiguate.
+            let constant = Value::Constant { value, disambiguator: None };
+            self.values.insert(value.ty(), constant)
+        } else {
+            // Multiple mentions of this constant will yield different values,
+            // so assign a different `disambiguator` to ensure they do not get the same `VnIndex`.
+            let index = self.values.insert_unique(value.ty(), |disambiguator| Value::Constant {
+                value,
+                disambiguator: Some(disambiguator),
+            });
+            (index, true)
+        };
+        if new {
+            let evaluated = self.eval_to_const(index);
+            let _index = self.evaluated.push(evaluated);
+            debug_assert_eq!(index, _index);
+            let _index = self.rev_locals.push(SmallVec::new());
+            debug_assert_eq!(index, _index);
+        }
+        index
+    }
+
+    #[inline]
     fn get(&self, index: VnIndex) -> &Value<'tcx> {
-        self.values.get_index(index.as_usize()).unwrap()
+        self.values.value(index)
+    }
+
+    #[inline]
+    fn ty(&self, index: VnIndex) -> Ty<'tcx> {
+        self.values.ty(index)
     }
 
     /// Record that `local` is assigned `value`. `local` must be SSA.
@@ -329,56 +481,49 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
         self.rev_locals[value].push(local);
     }
 
-    fn insert_constant(&mut self, value: Const<'tcx>) -> VnIndex {
-        let disambiguator = if value.is_deterministic() {
-            // The constant is deterministic, no need to disambiguate.
-            0
-        } else {
-            // Multiple mentions of this constant will yield different values,
-            // so assign a different `disambiguator` to ensure they do not get the same `VnIndex`.
-            let disambiguator = self.next_opaque();
-            // `disambiguator: 0` means deterministic.
-            debug_assert_ne!(disambiguator, 0);
-            disambiguator
-        };
-        self.insert(Value::Constant { value, disambiguator })
-    }
-
     fn insert_bool(&mut self, flag: bool) -> VnIndex {
         // Booleans are deterministic.
         let value = Const::from_bool(self.tcx, flag);
         debug_assert!(value.is_deterministic());
-        self.insert(Value::Constant { value, disambiguator: 0 })
+        self.insert(self.tcx.types.bool, Value::Constant { value, disambiguator: None })
     }
 
-    fn insert_scalar(&mut self, scalar: Scalar, ty: Ty<'tcx>) -> VnIndex {
+    fn insert_scalar(&mut self, ty: Ty<'tcx>, scalar: Scalar) -> VnIndex {
         // Scalars are deterministic.
         let value = Const::from_scalar(self.tcx, scalar, ty);
         debug_assert!(value.is_deterministic());
-        self.insert(Value::Constant { value, disambiguator: 0 })
+        self.insert(ty, Value::Constant { value, disambiguator: None })
     }
 
-    fn insert_tuple(&mut self, values: Vec<VnIndex>) -> VnIndex {
-        self.insert(Value::Aggregate(AggregateTy::Tuple, VariantIdx::ZERO, values))
+    fn insert_tuple(&mut self, ty: Ty<'tcx>, values: Vec<VnIndex>) -> VnIndex {
+        self.insert(ty, Value::Aggregate(VariantIdx::ZERO, values))
     }
 
-    fn insert_deref(&mut self, value: VnIndex) -> VnIndex {
-        let value = self.insert(Value::Projection(value, ProjectionElem::Deref));
+    fn insert_deref(&mut self, ty: Ty<'tcx>, value: VnIndex) -> VnIndex {
+        let value = self.insert(ty, Value::Projection(value, ProjectionElem::Deref));
         self.derefs.push(value);
         value
     }
 
     fn invalidate_derefs(&mut self) {
         for deref in std::mem::take(&mut self.derefs) {
-            let opaque = self.next_opaque();
-            *self.values.get_index_mut2(deref.index()).unwrap() = Value::Opaque(opaque);
+            self.values.forget(deref);
         }
     }
 
     #[instrument(level = "trace", skip(self), ret)]
     fn eval_to_const(&mut self, value: VnIndex) -> Option<OpTy<'tcx>> {
         use Value::*;
+        let ty = self.ty(value);
+        // Avoid computing layouts inside a coroutine, as that can cause cycles.
+        let ty = if !self.is_coroutine || ty.is_scalar() {
+            self.ecx.layout_of(ty).ok()?
+        } else {
+            return None;
+        };
         let op = match *self.get(value) {
+            _ if ty.is_zst() => ImmTy::uninit(ty).into(),
+
             Opaque(_) => return None,
             // Do not bother evaluating repeat expressions. This would uselessly consume memory.
             Repeat(..) => return None,
@@ -386,42 +531,14 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
             Constant { ref value, disambiguator: _ } => {
                 self.ecx.eval_mir_constant(value, DUMMY_SP, None).discard_err()?
             }
-            Aggregate(kind, variant, ref fields) => {
+            Aggregate(variant, ref fields) => {
                 let fields = fields
                     .iter()
                     .map(|&f| self.evaluated[f].as_ref())
                     .collect::<Option<Vec<_>>>()?;
-                let ty = match kind {
-                    AggregateTy::Array => {
-                        assert!(fields.len() > 0);
-                        Ty::new_array(self.tcx, fields[0].layout.ty, fields.len() as u64)
-                    }
-                    AggregateTy::Tuple => {
-                        Ty::new_tup_from_iter(self.tcx, fields.iter().map(|f| f.layout.ty))
-                    }
-                    AggregateTy::Def(def_id, args) => {
-                        self.tcx.type_of(def_id).instantiate(self.tcx, args)
-                    }
-                    AggregateTy::RawPtr { output_pointer_ty, .. } => output_pointer_ty,
-                };
-                let variant = if ty.is_enum() { Some(variant) } else { None };
-                let ty = self.ecx.layout_of(ty).ok()?;
-                if ty.is_zst() {
-                    ImmTy::uninit(ty).into()
-                } else if matches!(kind, AggregateTy::RawPtr { .. }) {
-                    // Pointers don't have fields, so don't `project_field` them.
-                    let data = self.ecx.read_pointer(fields[0]).discard_err()?;
-                    let meta = if fields[1].layout.is_zst() {
-                        MemPlaceMeta::None
-                    } else {
-                        MemPlaceMeta::Meta(self.ecx.read_scalar(fields[1]).discard_err()?)
-                    };
-                    let ptr_imm = Immediate::new_pointer_with_meta(data, meta, &self.ecx);
-                    ImmTy::from_immediate(ptr_imm, ty).into()
-                } else if matches!(
-                    ty.backend_repr,
-                    BackendRepr::Scalar(..) | BackendRepr::ScalarPair(..)
-                ) {
+                let variant = if ty.ty.is_enum() { Some(variant) } else { None };
+                if matches!(ty.backend_repr, BackendRepr::Scalar(..) | BackendRepr::ScalarPair(..))
+                {
                     let dest = self.ecx.allocate(ty, MemoryKind::Stack).discard_err()?;
                     let variant_dest = if let Some(variant) = variant {
                         self.ecx.project_downcast(&dest, variant).discard_err()?
@@ -446,60 +563,43 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
                     return None;
                 }
             }
+            RawPtr { pointer, metadata } => {
+                let pointer = self.evaluated[pointer].as_ref()?;
+                let metadata = self.evaluated[metadata].as_ref()?;
+
+                // Pointers don't have fields, so don't `project_field` them.
+                let data = self.ecx.read_pointer(pointer).discard_err()?;
+                let meta = if metadata.layout.is_zst() {
+                    MemPlaceMeta::None
+                } else {
+                    MemPlaceMeta::Meta(self.ecx.read_scalar(metadata).discard_err()?)
+                };
+                let ptr_imm = Immediate::new_pointer_with_meta(data, meta, &self.ecx);
+                ImmTy::from_immediate(ptr_imm, ty).into()
+            }
 
             Projection(base, elem) => {
-                let value = self.evaluated[base].as_ref()?;
-                let elem = match elem {
-                    ProjectionElem::Deref => ProjectionElem::Deref,
-                    ProjectionElem::Downcast(name, read_variant) => {
-                        ProjectionElem::Downcast(name, read_variant)
-                    }
-                    ProjectionElem::Field(f, ty) => ProjectionElem::Field(f, ty),
-                    ProjectionElem::ConstantIndex { offset, min_length, from_end } => {
-                        ProjectionElem::ConstantIndex { offset, min_length, from_end }
-                    }
-                    ProjectionElem::Subslice { from, to, from_end } => {
-                        ProjectionElem::Subslice { from, to, from_end }
-                    }
-                    ProjectionElem::OpaqueCast(ty) => ProjectionElem::OpaqueCast(ty),
-                    ProjectionElem::Subtype(ty) => ProjectionElem::Subtype(ty),
-                    ProjectionElem::UnwrapUnsafeBinder(ty) => {
-                        ProjectionElem::UnwrapUnsafeBinder(ty)
-                    }
-                    // This should have been replaced by a `ConstantIndex` earlier.
-                    ProjectionElem::Index(_) => return None,
-                };
-                self.ecx.project(value, elem).discard_err()?
+                let base = self.evaluated[base].as_ref()?;
+                // `Index` by constants should have been replaced by `ConstantIndex` by
+                // `simplify_place_projection`.
+                let elem = elem.try_map(|_| None, |()| ty.ty)?;
+                self.ecx.project(base, elem).discard_err()?
             }
-            Address { place, kind, provenance: _ } => {
+            Address { place, kind: _, provenance: _ } => {
                 if !place.is_indirect_first_projection() {
                     return None;
                 }
                 let local = self.locals[place.local]?;
                 let pointer = self.evaluated[local].as_ref()?;
                 let mut mplace = self.ecx.deref_pointer(pointer).discard_err()?;
-                for proj in place.projection.iter().skip(1) {
-                    // We have no call stack to associate a local with a value, so we cannot
-                    // interpret indexing.
-                    if matches!(proj, ProjectionElem::Index(_)) {
-                        return None;
-                    }
-                    mplace = self.ecx.project(&mplace, proj).discard_err()?;
+                for elem in place.projection.iter().skip(1) {
+                    // `Index` by constants should have been replaced by `ConstantIndex` by
+                    // `simplify_place_projection`.
+                    let elem = elem.try_map(|_| None, |ty| ty)?;
+                    mplace = self.ecx.project(&mplace, elem).discard_err()?;
                 }
                 let pointer = mplace.to_ref(&self.ecx);
-                let ty = match kind {
-                    AddressKind::Ref(bk) => Ty::new_ref(
-                        self.tcx,
-                        self.tcx.lifetimes.re_erased,
-                        mplace.layout.ty,
-                        bk.to_mutbl_lossy(),
-                    ),
-                    AddressKind::Address(mutbl) => {
-                        Ty::new_ptr(self.tcx, mplace.layout.ty, mutbl.to_mutbl_lossy())
-                    }
-                };
-                let layout = self.ecx.layout_of(ty).ok()?;
-                ImmTy::from_immediate(pointer, layout).into()
+                ImmTy::from_immediate(pointer, ty).into()
             }
 
             Discriminant(base) => {
@@ -509,34 +609,25 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
                     self.ecx.discriminant_for_variant(base.layout.ty, variant).discard_err()?;
                 discr_value.into()
             }
-            Len(slice) => {
-                let slice = self.evaluated[slice].as_ref()?;
-                let usize_layout = self.ecx.layout_of(self.tcx.types.usize).unwrap();
-                let len = slice.len(&self.ecx).discard_err()?;
-                let imm = ImmTy::from_uint(len, usize_layout);
-                imm.into()
-            }
-            NullaryOp(null_op, ty) => {
-                let layout = self.ecx.layout_of(ty).ok()?;
+            NullaryOp(null_op, arg_ty) => {
+                let arg_layout = self.ecx.layout_of(arg_ty).ok()?;
                 if let NullOp::SizeOf | NullOp::AlignOf = null_op
-                    && layout.is_unsized()
+                    && arg_layout.is_unsized()
                 {
                     return None;
                 }
                 let val = match null_op {
-                    NullOp::SizeOf => layout.size.bytes(),
-                    NullOp::AlignOf => layout.align.abi.bytes(),
+                    NullOp::SizeOf => arg_layout.size.bytes(),
+                    NullOp::AlignOf => arg_layout.align.abi.bytes(),
                     NullOp::OffsetOf(fields) => self
                         .ecx
                         .tcx
-                        .offset_of_subfield(self.typing_env(), layout, fields.iter())
+                        .offset_of_subfield(self.typing_env(), arg_layout, fields.iter())
                         .bytes(),
                     NullOp::UbChecks => return None,
                     NullOp::ContractChecks => return None,
                 };
-                let usize_layout = self.ecx.layout_of(self.tcx.types.usize).unwrap();
-                let imm = ImmTy::from_uint(val, usize_layout);
-                imm.into()
+                ImmTy::from_uint(val, ty).into()
             }
             UnaryOp(un_op, operand) => {
                 let operand = self.evaluated[operand].as_ref()?;
@@ -552,30 +643,27 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
                 let val = self.ecx.binary_op(bin_op, &lhs, &rhs).discard_err()?;
                 val.into()
             }
-            Cast { kind, value, from: _, to } => match kind {
+            Cast { kind, value } => match kind {
                 CastKind::IntToInt | CastKind::IntToFloat => {
                     let value = self.evaluated[value].as_ref()?;
                     let value = self.ecx.read_immediate(value).discard_err()?;
-                    let to = self.ecx.layout_of(to).ok()?;
-                    let res = self.ecx.int_to_int_or_float(&value, to).discard_err()?;
+                    let res = self.ecx.int_to_int_or_float(&value, ty).discard_err()?;
                     res.into()
                 }
                 CastKind::FloatToFloat | CastKind::FloatToInt => {
                     let value = self.evaluated[value].as_ref()?;
                     let value = self.ecx.read_immediate(value).discard_err()?;
-                    let to = self.ecx.layout_of(to).ok()?;
-                    let res = self.ecx.float_to_float_or_int(&value, to).discard_err()?;
+                    let res = self.ecx.float_to_float_or_int(&value, ty).discard_err()?;
                     res.into()
                 }
                 CastKind::Transmute => {
                     let value = self.evaluated[value].as_ref()?;
-                    let to = self.ecx.layout_of(to).ok()?;
                     // `offset` for immediates generally only supports projections that match the
                     // type of the immediate. However, as a HACK, we exploit that it can also do
                     // limited transmutes: it only works between types with the same layout, and
                     // cannot transmute pointers to integers.
                     if value.as_mplace_or_imm().is_right() {
-                        let can_transmute = match (value.layout.backend_repr, to.backend_repr) {
+                        let can_transmute = match (value.layout.backend_repr, ty.backend_repr) {
                             (BackendRepr::Scalar(s1), BackendRepr::Scalar(s2)) => {
                                 s1.size(&self.ecx) == s2.size(&self.ecx)
                                     && !matches!(s1.primitive(), Primitive::Pointer(..))
@@ -595,13 +683,12 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
                             return None;
                         }
                     }
-                    value.offset(Size::ZERO, to, &self.ecx).discard_err()?
+                    value.offset(Size::ZERO, ty, &self.ecx).discard_err()?
                 }
                 CastKind::PointerCoercion(ty::adjustment::PointerCoercion::Unsize, _) => {
                     let src = self.evaluated[value].as_ref()?;
-                    let to = self.ecx.layout_of(to).ok()?;
-                    let dest = self.ecx.allocate(to, MemoryKind::Stack).discard_err()?;
-                    self.ecx.unsize_into(src, to, &dest.clone().into()).discard_err()?;
+                    let dest = self.ecx.allocate(ty, MemoryKind::Stack).discard_err()?;
+                    self.ecx.unsize_into(src, ty, &dest).discard_err()?;
                     self.ecx
                         .alloc_mark_immutable(dest.ptr().provenance.unwrap().alloc_id())
                         .discard_err()?;
@@ -610,15 +697,13 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
                 CastKind::FnPtrToPtr | CastKind::PtrToPtr => {
                     let src = self.evaluated[value].as_ref()?;
                     let src = self.ecx.read_immediate(src).discard_err()?;
-                    let to = self.ecx.layout_of(to).ok()?;
-                    let ret = self.ecx.ptr_to_ptr(&src, to).discard_err()?;
+                    let ret = self.ecx.ptr_to_ptr(&src, ty).discard_err()?;
                     ret.into()
                 }
                 CastKind::PointerCoercion(ty::adjustment::PointerCoercion::UnsafeFnPointer, _) => {
                     let src = self.evaluated[value].as_ref()?;
                     let src = self.ecx.read_immediate(src).discard_err()?;
-                    let to = self.ecx.layout_of(to).ok()?;
-                    ImmTy::from_immediate(*src, to).into()
+                    ImmTy::from_immediate(*src, ty).into()
                 }
                 _ => return None,
             },
@@ -628,31 +713,30 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
 
     fn project(
         &mut self,
-        place: PlaceRef<'tcx>,
+        place_ty: PlaceTy<'tcx>,
         value: VnIndex,
         proj: PlaceElem<'tcx>,
         from_non_ssa_index: &mut bool,
-    ) -> Option<VnIndex> {
+    ) -> Option<(PlaceTy<'tcx>, VnIndex)> {
+        let projection_ty = place_ty.projection_ty(self.tcx, proj);
         let proj = match proj {
             ProjectionElem::Deref => {
-                let ty = place.ty(self.local_decls, self.tcx).ty;
-                if let Some(Mutability::Not) = ty.ref_mutability()
-                    && let Some(pointee_ty) = ty.builtin_deref(true)
-                    && pointee_ty.is_freeze(self.tcx, self.typing_env())
+                if let Some(Mutability::Not) = place_ty.ty.ref_mutability()
+                    && projection_ty.ty.is_freeze(self.tcx, self.typing_env())
                 {
                     // An immutable borrow `_x` always points to the same value for the
                     // lifetime of the borrow, so we can merge all instances of `*_x`.
-                    return Some(self.insert_deref(value));
+                    return Some((projection_ty, self.insert_deref(projection_ty.ty, value)));
                 } else {
                     return None;
                 }
             }
             ProjectionElem::Downcast(name, index) => ProjectionElem::Downcast(name, index),
-            ProjectionElem::Field(f, ty) => {
-                if let Value::Aggregate(_, _, fields) = self.get(value) {
-                    return Some(fields[f.as_usize()]);
+            ProjectionElem::Field(f, _) => {
+                if let Value::Aggregate(_, fields) = self.get(value) {
+                    return Some((projection_ty, fields[f.as_usize()]));
                 } else if let Value::Projection(outer_value, ProjectionElem::Downcast(_, read_variant)) = self.get(value)
-                    && let Value::Aggregate(_, written_variant, fields) = self.get(*outer_value)
+                    && let Value::Aggregate(written_variant, fields) = self.get(*outer_value)
                     // This pass is not aware of control-flow, so we do not know whether the
                     // replacement we are doing is actually reachable. We could be in any arm of
                     // ```
@@ -670,14 +754,14 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
                     // a downcast to an inactive variant.
                     && written_variant == read_variant
                 {
-                    return Some(fields[f.as_usize()]);
+                    return Some((projection_ty, fields[f.as_usize()]));
                 }
-                ProjectionElem::Field(f, ty)
+                ProjectionElem::Field(f, ())
             }
             ProjectionElem::Index(idx) => {
                 if let Value::Repeat(inner, _) = self.get(value) {
                     *from_non_ssa_index |= self.locals[idx].is_none();
-                    return Some(*inner);
+                    return Some((projection_ty, *inner));
                 }
                 let idx = self.locals[idx]?;
                 ProjectionElem::Index(idx)
@@ -685,15 +769,16 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
             ProjectionElem::ConstantIndex { offset, min_length, from_end } => {
                 match self.get(value) {
                     Value::Repeat(inner, _) => {
-                        return Some(*inner);
+                        return Some((projection_ty, *inner));
                     }
-                    Value::Aggregate(AggregateTy::Array, _, operands) => {
+                    Value::Aggregate(_, operands) => {
                         let offset = if from_end {
                             operands.len() - offset as usize
                         } else {
                             offset as usize
                         };
-                        return operands.get(offset).copied();
+                        let value = operands.get(offset).copied()?;
+                        return Some((projection_ty, value));
                     }
                     _ => {}
                 };
@@ -702,12 +787,13 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
             ProjectionElem::Subslice { from, to, from_end } => {
                 ProjectionElem::Subslice { from, to, from_end }
             }
-            ProjectionElem::OpaqueCast(ty) => ProjectionElem::OpaqueCast(ty),
-            ProjectionElem::Subtype(ty) => ProjectionElem::Subtype(ty),
-            ProjectionElem::UnwrapUnsafeBinder(ty) => ProjectionElem::UnwrapUnsafeBinder(ty),
+            ProjectionElem::OpaqueCast(_) => ProjectionElem::OpaqueCast(()),
+            ProjectionElem::Subtype(_) => ProjectionElem::Subtype(()),
+            ProjectionElem::UnwrapUnsafeBinder(_) => ProjectionElem::UnwrapUnsafeBinder(()),
         };
 
-        Some(self.insert(Value::Projection(value, proj)))
+        let value = self.insert(projection_ty.ty, Value::Projection(value, proj));
+        Some((projection_ty, value))
     }
 
     /// Simplify the projection chain if we know better.
@@ -769,6 +855,8 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
 
         // Invariant: `value` holds the value up-to the `index`th projection excluded.
         let mut value = self.locals[place.local]?;
+        // Invariant: `value` has type `place_ty`, with optional downcast variant if needed.
+        let mut place_ty = PlaceTy::from_ty(self.local_decls[place.local].ty);
         let mut from_non_ssa_index = false;
         for (index, proj) in place.projection.iter().enumerate() {
             if let Value::Projection(pointer, ProjectionElem::Deref) = *self.get(value)
@@ -777,7 +865,13 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
                 && let Some(v) = self.simplify_place_value(&mut pointee, location)
             {
                 value = v;
-                place_ref = pointee.project_deeper(&place.projection[index..], self.tcx).as_ref();
+                // `pointee` holds a `Place`, so `ProjectionElem::Index` holds a `Local`.
+                // That local is SSA, but we otherwise have no guarantee on that local's value at
+                // the current location compared to its value where `pointee` was borrowed.
+                if pointee.projection.iter().all(|elem| !matches!(elem, ProjectionElem::Index(_))) {
+                    place_ref =
+                        pointee.project_deeper(&place.projection[index..], self.tcx).as_ref();
+                }
             }
             if let Some(local) = self.try_as_local(value, location) {
                 // Both `local` and `Place { local: place.local, projection: projection[..index] }`
@@ -786,8 +880,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
                 place_ref = PlaceRef { local, projection: &place.projection[index..] };
             }
 
-            let base = PlaceRef { local: place.local, projection: &place.projection[..index] };
-            value = self.project(base, value, proj, &mut from_non_ssa_index)?;
+            (place_ty, value) = self.project(place_ty, value, proj, &mut from_non_ssa_index)?;
         }
 
         if let Value::Projection(pointer, ProjectionElem::Deref) = *self.get(value)
@@ -796,7 +889,12 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
             && let Some(v) = self.simplify_place_value(&mut pointee, location)
         {
             value = v;
-            place_ref = pointee.project_deeper(&[], self.tcx).as_ref();
+            // `pointee` holds a `Place`, so `ProjectionElem::Index` holds a `Local`.
+            // That local is SSA, but we otherwise have no guarantee on that local's value at
+            // the current location compared to its value where `pointee` was borrowed.
+            if pointee.projection.iter().all(|elem| !matches!(elem, ProjectionElem::Index(_))) {
+                place_ref = pointee.project_deeper(&[], self.tcx).as_ref();
+            }
         }
         if let Some(new_local) = self.try_as_local(value, location) {
             place_ref = PlaceRef { local: new_local, projection: &[] };
@@ -864,18 +962,12 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
                 self.simplify_place_projection(place, location);
                 return Some(self.new_pointer(*place, AddressKind::Address(mutbl)));
             }
-            Rvalue::WrapUnsafeBinder(ref mut op, ty) => {
+            Rvalue::WrapUnsafeBinder(ref mut op, _) => {
                 let value = self.simplify_operand(op, location)?;
-                Value::Cast {
-                    kind: CastKind::Transmute,
-                    value,
-                    from: op.ty(self.local_decls, self.tcx),
-                    to: ty,
-                }
+                Value::Cast { kind: CastKind::Transmute, value }
             }
 
             // Operations.
-            Rvalue::Len(ref mut place) => return self.simplify_len(place, location),
             Rvalue::Cast(ref mut kind, ref mut value, to) => {
                 return self.simplify_cast(kind, value, to, location);
             }
@@ -896,18 +988,17 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
             // Unsupported values.
             Rvalue::ThreadLocalRef(..) | Rvalue::ShallowInitBox(..) => return None,
         };
-        debug!(?value);
-        Some(self.insert(value))
+        let ty = rvalue.ty(self.local_decls, self.tcx);
+        Some(self.insert(ty, value))
     }
 
     fn simplify_discriminant(&mut self, place: VnIndex) -> Option<VnIndex> {
-        if let Value::Aggregate(enum_ty, variant, _) = *self.get(place)
-            && let AggregateTy::Def(enum_did, enum_args) = enum_ty
-            && let DefKind::Enum = self.tcx.def_kind(enum_did)
+        let enum_ty = self.ty(place);
+        if enum_ty.is_enum()
+            && let Value::Aggregate(variant, _) = *self.get(place)
         {
-            let enum_ty = self.tcx.type_of(enum_did).instantiate(self.tcx, enum_args);
             let discr = self.ecx.discriminant_for_variant(enum_ty, variant).discard_err()?;
-            return Some(self.insert_scalar(discr.to_scalar(), discr.layout.ty));
+            return Some(self.insert_scalar(discr.layout.ty, discr.to_scalar()));
         }
 
         None
@@ -915,46 +1006,29 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
 
     fn try_as_place_elem(
         &mut self,
-        proj: ProjectionElem<VnIndex, Ty<'tcx>>,
+        ty: Ty<'tcx>,
+        proj: ProjectionElem<VnIndex, ()>,
         loc: Location,
     ) -> Option<PlaceElem<'tcx>> {
-        Some(match proj {
-            ProjectionElem::Deref => ProjectionElem::Deref,
-            ProjectionElem::Field(idx, ty) => ProjectionElem::Field(idx, ty),
-            ProjectionElem::Index(idx) => {
-                let Some(local) = self.try_as_local(idx, loc) else {
-                    return None;
-                };
+        proj.try_map(
+            |value| {
+                let local = self.try_as_local(value, loc)?;
                 self.reused_locals.insert(local);
-                ProjectionElem::Index(local)
-            }
-            ProjectionElem::ConstantIndex { offset, min_length, from_end } => {
-                ProjectionElem::ConstantIndex { offset, min_length, from_end }
-            }
-            ProjectionElem::Subslice { from, to, from_end } => {
-                ProjectionElem::Subslice { from, to, from_end }
-            }
-            ProjectionElem::Downcast(symbol, idx) => ProjectionElem::Downcast(symbol, idx),
-            ProjectionElem::OpaqueCast(idx) => ProjectionElem::OpaqueCast(idx),
-            ProjectionElem::Subtype(idx) => ProjectionElem::Subtype(idx),
-            ProjectionElem::UnwrapUnsafeBinder(ty) => ProjectionElem::UnwrapUnsafeBinder(ty),
-        })
+                Some(local)
+            },
+            |()| ty,
+        )
     }
 
     fn simplify_aggregate_to_copy(
         &mut self,
-        lhs: &Place<'tcx>,
-        rvalue: &mut Rvalue<'tcx>,
-        location: Location,
-        fields: &[VnIndex],
+        ty: Ty<'tcx>,
         variant_index: VariantIdx,
+        fields: &[VnIndex],
     ) -> Option<VnIndex> {
-        let Some(&first_field) = fields.first() else {
-            return None;
-        };
-        let Value::Projection(copy_from_value, _) = *self.get(first_field) else {
-            return None;
-        };
+        let Some(&first_field) = fields.first() else { return None };
+        let Value::Projection(copy_from_value, _) = *self.get(first_field) else { return None };
+
         // All fields must correspond one-to-one and come from the same aggregate value.
         if fields.iter().enumerate().any(|(index, &v)| {
             if let Value::Projection(pointer, ProjectionElem::Field(from_index, _)) = *self.get(v)
@@ -981,21 +1055,8 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
             }
         }
 
-        // Allow introducing places with non-constant offsets, as those are still better than
-        // reconstructing an aggregate.
-        if let Some(place) = self.try_as_place(copy_from_local_value, location, true)
-            && rvalue.ty(self.local_decls, self.tcx) == place.ty(self.local_decls, self.tcx).ty
-        {
-            // Avoid creating `*a = copy (*b)`, as they might be aliases resulting in overlapping assignments.
-            // FIXME: This also avoids any kind of projection, not just derefs. We can add allowed projections.
-            if lhs.as_local().is_some() {
-                self.reused_locals.insert(place.local);
-                *rvalue = Rvalue::Use(Operand::Copy(place));
-            }
-            return Some(copy_from_local_value);
-        }
-
-        None
+        // Both must be variants of the same type.
+        if self.ty(copy_from_local_value) == ty { Some(copy_from_local_value) } else { None }
     }
 
     fn simplify_aggregate(
@@ -1004,9 +1065,11 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
         rvalue: &mut Rvalue<'tcx>,
         location: Location,
     ) -> Option<VnIndex> {
+        let tcx = self.tcx;
+        let ty = rvalue.ty(self.local_decls, tcx);
+
         let Rvalue::Aggregate(box ref kind, ref mut field_ops) = *rvalue else { bug!() };
 
-        let tcx = self.tcx;
         if field_ops.is_empty() {
             let is_zst = match *kind {
                 AggregateKind::Array(..)
@@ -1021,87 +1084,79 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
             };
 
             if is_zst {
-                let ty = rvalue.ty(self.local_decls, tcx);
                 return Some(self.insert_constant(Const::zero_sized(ty)));
             }
         }
 
-        let (mut ty, variant_index) = match *kind {
-            AggregateKind::Array(..) => {
-                assert!(!field_ops.is_empty());
-                (AggregateTy::Array, FIRST_VARIANT)
-            }
-            AggregateKind::Tuple => {
+        let fields: Vec<_> = field_ops
+            .iter_mut()
+            .map(|op| {
+                self.simplify_operand(op, location)
+                    .unwrap_or_else(|| self.new_opaque(op.ty(self.local_decls, self.tcx)))
+            })
+            .collect();
+
+        let variant_index = match *kind {
+            AggregateKind::Array(..) | AggregateKind::Tuple => {
                 assert!(!field_ops.is_empty());
-                (AggregateTy::Tuple, FIRST_VARIANT)
-            }
-            AggregateKind::Closure(did, args)
-            | AggregateKind::CoroutineClosure(did, args)
-            | AggregateKind::Coroutine(did, args) => (AggregateTy::Def(did, args), FIRST_VARIANT),
-            AggregateKind::Adt(did, variant_index, args, _, None) => {
-                (AggregateTy::Def(did, args), variant_index)
+                FIRST_VARIANT
             }
+            AggregateKind::Closure(..)
+            | AggregateKind::CoroutineClosure(..)
+            | AggregateKind::Coroutine(..) => FIRST_VARIANT,
+            AggregateKind::Adt(_, variant_index, _, _, None) => variant_index,
             // Do not track unions.
             AggregateKind::Adt(_, _, _, _, Some(_)) => return None,
-            AggregateKind::RawPtr(pointee_ty, mtbl) => {
+            AggregateKind::RawPtr(..) => {
                 assert_eq!(field_ops.len(), 2);
-                let data_pointer_ty = field_ops[FieldIdx::ZERO].ty(self.local_decls, self.tcx);
-                let output_pointer_ty = Ty::new_ptr(self.tcx, pointee_ty, mtbl);
-                (AggregateTy::RawPtr { data_pointer_ty, output_pointer_ty }, FIRST_VARIANT)
-            }
-        };
-
-        let mut fields: Vec<_> = field_ops
-            .iter_mut()
-            .map(|op| self.simplify_operand(op, location).unwrap_or_else(|| self.new_opaque()))
-            .collect();
-
-        if let AggregateTy::RawPtr { data_pointer_ty, output_pointer_ty } = &mut ty {
-            let mut was_updated = false;
+                let [mut pointer, metadata] = fields.try_into().unwrap();
+
+                // Any thin pointer of matching mutability is fine as the data pointer.
+                let mut was_updated = false;
+                while let Value::Cast { kind: CastKind::PtrToPtr, value: cast_value } =
+                    self.get(pointer)
+                    && let ty::RawPtr(from_pointee_ty, from_mtbl) = self.ty(*cast_value).kind()
+                    && let ty::RawPtr(_, output_mtbl) = ty.kind()
+                    && from_mtbl == output_mtbl
+                    && from_pointee_ty.is_sized(self.tcx, self.typing_env())
+                {
+                    pointer = *cast_value;
+                    was_updated = true;
+                }
 
-            // Any thin pointer of matching mutability is fine as the data pointer.
-            while let Value::Cast {
-                kind: CastKind::PtrToPtr,
-                value: cast_value,
-                from: cast_from,
-                to: _,
-            } = self.get(fields[0])
-                && let ty::RawPtr(from_pointee_ty, from_mtbl) = cast_from.kind()
-                && let ty::RawPtr(_, output_mtbl) = output_pointer_ty.kind()
-                && from_mtbl == output_mtbl
-                && from_pointee_ty.is_sized(self.tcx, self.typing_env())
-            {
-                fields[0] = *cast_value;
-                *data_pointer_ty = *cast_from;
-                was_updated = true;
-            }
+                if was_updated && let Some(op) = self.try_as_operand(pointer, location) {
+                    field_ops[FieldIdx::ZERO] = op;
+                }
 
-            if was_updated && let Some(op) = self.try_as_operand(fields[0], location) {
-                field_ops[FieldIdx::ZERO] = op;
+                return Some(self.insert(ty, Value::RawPtr { pointer, metadata }));
             }
-        }
+        };
 
-        if let AggregateTy::Array = ty
+        if ty.is_array()
             && fields.len() > 4
+            && let Ok(&first) = fields.iter().all_equal_value()
         {
-            let first = fields[0];
-            if fields.iter().all(|&v| v == first) {
-                let len = ty::Const::from_target_usize(self.tcx, fields.len().try_into().unwrap());
-                if let Some(op) = self.try_as_operand(first, location) {
-                    *rvalue = Rvalue::Repeat(op, len);
-                }
-                return Some(self.insert(Value::Repeat(first, len)));
+            let len = ty::Const::from_target_usize(self.tcx, fields.len().try_into().unwrap());
+            if let Some(op) = self.try_as_operand(first, location) {
+                *rvalue = Rvalue::Repeat(op, len);
             }
+            return Some(self.insert(ty, Value::Repeat(first, len)));
         }
 
-        if let AggregateTy::Def(_, _) = ty
-            && let Some(value) =
-                self.simplify_aggregate_to_copy(lhs, rvalue, location, &fields, variant_index)
-        {
+        if let Some(value) = self.simplify_aggregate_to_copy(ty, variant_index, &fields) {
+            // Allow introducing places with non-constant offsets, as those are still better than
+            // reconstructing an aggregate. But avoid creating `*a = copy (*b)`, as they might be
+            // aliases resulting in overlapping assignments.
+            let allow_complex_projection =
+                lhs.projection[..].iter().all(PlaceElem::is_stable_offset);
+            if let Some(place) = self.try_as_place(value, location, allow_complex_projection) {
+                self.reused_locals.insert(place.local);
+                *rvalue = Rvalue::Use(Operand::Copy(place));
+            }
             return Some(value);
         }
 
-        Some(self.insert(Value::Aggregate(ty, variant_index, fields)))
+        Some(self.insert(ty, Value::Aggregate(variant_index, fields)))
     }
 
     #[instrument(level = "trace", skip(self), ret)]
@@ -1112,13 +1167,15 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
         location: Location,
     ) -> Option<VnIndex> {
         let mut arg_index = self.simplify_operand(arg_op, location)?;
+        let arg_ty = self.ty(arg_index);
+        let ret_ty = op.ty(self.tcx, arg_ty);
 
         // PtrMetadata doesn't care about *const vs *mut vs & vs &mut,
         // so start by removing those distinctions so we can update the `Operand`
         if op == UnOp::PtrMetadata {
             let mut was_updated = false;
             loop {
-                match self.get(arg_index) {
+                arg_index = match self.get(arg_index) {
                     // Pointer casts that preserve metadata, such as
                     // `*const [i32]` <-> `*mut [i32]` <-> `*mut [f32]`.
                     // It's critical that this not eliminate cases like
@@ -1127,12 +1184,22 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
                     // we can't always know exactly what the metadata are.
                     // To allow things like `*mut (?A, ?T)` <-> `*mut (?B, ?T)`,
                     // it's fine to get a projection as the type.
-                    Value::Cast { kind: CastKind::PtrToPtr, value: inner, from, to }
-                        if self.pointers_have_same_metadata(*from, *to) =>
+                    Value::Cast { kind: CastKind::PtrToPtr, value: inner }
+                        if self.pointers_have_same_metadata(self.ty(*inner), arg_ty) =>
+                    {
+                        *inner
+                    }
+
+                    // We have an unsizing cast, which assigns the length to wide pointer metadata.
+                    Value::Cast {
+                        kind: CastKind::PointerCoercion(ty::adjustment::PointerCoercion::Unsize, _),
+                        value: from,
+                    } if let Some(from) = self.ty(*from).builtin_deref(true)
+                        && let ty::Array(_, len) = from.kind()
+                        && let Some(to) = self.ty(arg_index).builtin_deref(true)
+                        && let ty::Slice(..) = to.kind() =>
                     {
-                        arg_index = *inner;
-                        was_updated = true;
-                        continue;
+                        return Some(self.insert_constant(Const::Ty(self.tcx.types.usize, *len)));
                     }
 
                     // `&mut *p`, `&raw *p`, etc don't change metadata.
@@ -1141,18 +1208,16 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
                             place.as_ref()
                             && let Some(local_index) = self.locals[local] =>
                     {
-                        arg_index = local_index;
-                        was_updated = true;
-                        continue;
+                        local_index
                     }
 
-                    _ => {
-                        if was_updated && let Some(op) = self.try_as_operand(arg_index, location) {
-                            *arg_op = op;
-                        }
-                        break;
-                    }
-                }
+                    _ => break,
+                };
+                was_updated = true;
+            }
+
+            if was_updated && let Some(op) = self.try_as_operand(arg_index, location) {
+                *arg_op = op;
             }
         }
 
@@ -1165,26 +1230,22 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
             (UnOp::Not, Value::BinaryOp(BinOp::Ne, lhs, rhs)) => {
                 Value::BinaryOp(BinOp::Eq, *lhs, *rhs)
             }
-            (UnOp::PtrMetadata, Value::Aggregate(AggregateTy::RawPtr { .. }, _, fields)) => {
-                return Some(fields[1]);
-            }
+            (UnOp::PtrMetadata, Value::RawPtr { metadata, .. }) => return Some(*metadata),
             // We have an unsizing cast, which assigns the length to wide pointer metadata.
             (
                 UnOp::PtrMetadata,
                 Value::Cast {
                     kind: CastKind::PointerCoercion(ty::adjustment::PointerCoercion::Unsize, _),
-                    from,
-                    to,
-                    ..
+                    value: inner,
                 },
-            ) if let ty::Slice(..) = to.builtin_deref(true).unwrap().kind()
-                && let ty::Array(_, len) = from.builtin_deref(true).unwrap().kind() =>
+            ) if let ty::Slice(..) = arg_ty.builtin_deref(true).unwrap().kind()
+                && let ty::Array(_, len) = self.ty(*inner).builtin_deref(true).unwrap().kind() =>
             {
                 return Some(self.insert_constant(Const::Ty(self.tcx.types.usize, *len)));
             }
             _ => Value::UnaryOp(op, arg_index),
         };
-        Some(self.insert(value))
+        Some(self.insert(ret_ty, value))
     }
 
     #[instrument(level = "trace", skip(self), ret)]
@@ -1197,25 +1258,23 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
     ) -> Option<VnIndex> {
         let lhs = self.simplify_operand(lhs_operand, location);
         let rhs = self.simplify_operand(rhs_operand, location);
+
         // Only short-circuit options after we called `simplify_operand`
         // on both operands for side effect.
         let mut lhs = lhs?;
         let mut rhs = rhs?;
 
-        let lhs_ty = lhs_operand.ty(self.local_decls, self.tcx);
+        let lhs_ty = self.ty(lhs);
 
         // If we're comparing pointers, remove `PtrToPtr` casts if the from
         // types of both casts and the metadata all match.
         if let BinOp::Eq | BinOp::Ne | BinOp::Lt | BinOp::Le | BinOp::Gt | BinOp::Ge = op
             && lhs_ty.is_any_ptr()
-            && let Value::Cast {
-                kind: CastKind::PtrToPtr, value: lhs_value, from: lhs_from, ..
-            } = self.get(lhs)
-            && let Value::Cast {
-                kind: CastKind::PtrToPtr, value: rhs_value, from: rhs_from, ..
-            } = self.get(rhs)
-            && lhs_from == rhs_from
-            && self.pointers_have_same_metadata(*lhs_from, lhs_ty)
+            && let Value::Cast { kind: CastKind::PtrToPtr, value: lhs_value } = self.get(lhs)
+            && let Value::Cast { kind: CastKind::PtrToPtr, value: rhs_value } = self.get(rhs)
+            && let lhs_from = self.ty(*lhs_value)
+            && lhs_from == self.ty(*rhs_value)
+            && self.pointers_have_same_metadata(lhs_from, lhs_ty)
         {
             lhs = *lhs_value;
             rhs = *rhs_value;
@@ -1230,8 +1289,9 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
         if let Some(value) = self.simplify_binary_inner(op, lhs_ty, lhs, rhs) {
             return Some(value);
         }
+        let ty = op.ty(self.tcx, lhs_ty, self.ty(rhs));
         let value = Value::BinaryOp(op, lhs, rhs);
-        Some(self.insert(value))
+        Some(self.insert(ty, value))
     }
 
     fn simplify_binary_inner(
@@ -1323,19 +1383,19 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
                 | BinOp::Shr,
                 Left(0),
                 _,
-            ) => self.insert_scalar(Scalar::from_uint(0u128, layout.size), lhs_ty),
+            ) => self.insert_scalar(lhs_ty, Scalar::from_uint(0u128, layout.size)),
             // Attempt to simplify `x | ALL_ONES` to `ALL_ONES`.
             (BinOp::BitOr, _, Left(ones)) | (BinOp::BitOr, Left(ones), _)
                 if ones == layout.size.truncate(u128::MAX)
                     || (layout.ty.is_bool() && ones == 1) =>
             {
-                self.insert_scalar(Scalar::from_uint(ones, layout.size), lhs_ty)
+                self.insert_scalar(lhs_ty, Scalar::from_uint(ones, layout.size))
             }
             // Sub/Xor with itself.
             (BinOp::Sub | BinOp::SubWithOverflow | BinOp::SubUnchecked | BinOp::BitXor, a, b)
                 if a == b =>
             {
-                self.insert_scalar(Scalar::from_uint(0u128, layout.size), lhs_ty)
+                self.insert_scalar(lhs_ty, Scalar::from_uint(0u128, layout.size))
             }
             // Comparison:
             // - if both operands can be computed as bits, just compare the bits;
@@ -1349,8 +1409,9 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
         };
 
         if op.is_overflowing() {
+            let ty = Ty::new_tup(self.tcx, &[self.ty(result), self.tcx.types.bool]);
             let false_val = self.insert_bool(false);
-            Some(self.insert_tuple(vec![result, false_val]))
+            Some(self.insert_tuple(ty, vec![result, false_val]))
         } else {
             Some(result)
         }
@@ -1366,9 +1427,9 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
         use CastKind::*;
         use rustc_middle::ty::adjustment::PointerCoercion::*;
 
-        let mut from = initial_operand.ty(self.local_decls, self.tcx);
         let mut kind = *initial_kind;
         let mut value = self.simplify_operand(initial_operand, location)?;
+        let mut from = self.ty(value);
         if from == to {
             return Some(value);
         }
@@ -1376,7 +1437,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
         if let CastKind::PointerCoercion(ReifyFnPointer | ClosureFnPointer(_), _) = kind {
             // Each reification of a generic fn may get a different pointer.
             // Do not try to merge them.
-            return Some(self.new_opaque());
+            return Some(self.new_opaque(to));
         }
 
         let mut was_ever_updated = false;
@@ -1399,23 +1460,22 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
             // If a cast just casts away the metadata again, then we can get it by
             // casting the original thin pointer passed to `from_raw_parts`
             if let PtrToPtr = kind
-                && let Value::Aggregate(AggregateTy::RawPtr { data_pointer_ty, .. }, _, fields) =
-                    self.get(value)
+                && let Value::RawPtr { pointer, .. } = self.get(value)
                 && let ty::RawPtr(to_pointee, _) = to.kind()
                 && to_pointee.is_sized(self.tcx, self.typing_env())
             {
-                from = *data_pointer_ty;
-                value = fields[0];
+                from = self.ty(*pointer);
+                value = *pointer;
                 was_updated_this_iteration = true;
-                if *data_pointer_ty == to {
-                    return Some(fields[0]);
+                if from == to {
+                    return Some(*pointer);
                 }
             }
 
             // Aggregate-then-Transmute can just transmute the original field value,
             // so long as the bytes of a value from only from a single field.
             if let Transmute = kind
-                && let Value::Aggregate(_aggregate_ty, variant_idx, field_values) = self.get(value)
+                && let Value::Aggregate(variant_idx, field_values) = self.get(value)
                 && let Some((field_idx, field_ty)) =
                     self.value_is_all_in_one_field(from, *variant_idx)
             {
@@ -1428,13 +1488,8 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
             }
 
             // Various cast-then-cast cases can be simplified.
-            if let Value::Cast {
-                kind: inner_kind,
-                value: inner_value,
-                from: inner_from,
-                to: inner_to,
-            } = *self.get(value)
-            {
+            if let Value::Cast { kind: inner_kind, value: inner_value } = *self.get(value) {
+                let inner_from = self.ty(inner_value);
                 let new_kind = match (inner_kind, kind) {
                     // Even if there's a narrowing cast in here that's fine, because
                     // things like `*mut [i32] -> *mut i32 -> *const i32` and
@@ -1443,9 +1498,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
                     // PtrToPtr-then-Transmute is fine so long as the pointer cast is identity:
                     // `*const T -> *mut T -> NonNull<T>` is fine, but we need to check for narrowing
                     // to skip things like `*const [i32] -> *const i32 -> NonNull<T>`.
-                    (PtrToPtr, Transmute)
-                        if self.pointers_have_same_metadata(inner_from, inner_to) =>
-                    {
+                    (PtrToPtr, Transmute) if self.pointers_have_same_metadata(inner_from, from) => {
                         Some(Transmute)
                     }
                     // Similarly, for Transmute-then-PtrToPtr. Note that we need to check different
@@ -1456,7 +1509,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
                     // If would be legal to always do this, but we don't want to hide information
                     // from the backend that it'd otherwise be able to use for optimizations.
                     (Transmute, Transmute)
-                        if !self.type_may_have_niche_of_interest_to_backend(inner_to) =>
+                        if !self.type_may_have_niche_of_interest_to_backend(from) =>
                     {
                         Some(Transmute)
                     }
@@ -1485,40 +1538,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
             *initial_kind = kind;
         }
 
-        Some(self.insert(Value::Cast { kind, value, from, to }))
-    }
-
-    fn simplify_len(&mut self, place: &mut Place<'tcx>, location: Location) -> Option<VnIndex> {
-        // Trivial case: we are fetching a statically known length.
-        let place_ty = place.ty(self.local_decls, self.tcx).ty;
-        if let ty::Array(_, len) = place_ty.kind() {
-            return Some(self.insert_constant(Const::Ty(self.tcx.types.usize, *len)));
-        }
-
-        let mut inner = self.simplify_place_value(place, location)?;
-
-        // The length information is stored in the wide pointer.
-        // Reborrowing copies length information from one pointer to the other.
-        while let Value::Address { place: borrowed, .. } = self.get(inner)
-            && let [PlaceElem::Deref] = borrowed.projection[..]
-            && let Some(borrowed) = self.locals[borrowed.local]
-        {
-            inner = borrowed;
-        }
-
-        // We have an unsizing cast, which assigns the length to wide pointer metadata.
-        if let Value::Cast { kind, from, to, .. } = self.get(inner)
-            && let CastKind::PointerCoercion(ty::adjustment::PointerCoercion::Unsize, _) = kind
-            && let Some(from) = from.builtin_deref(true)
-            && let ty::Array(_, len) = from.kind()
-            && let Some(to) = to.builtin_deref(true)
-            && let ty::Slice(..) = to.kind()
-        {
-            return Some(self.insert_constant(Const::Ty(self.tcx.types.usize, *len)));
-        }
-
-        // Fallback: a symbolic `Len`.
-        Some(self.insert(Value::Len(inner)))
+        Some(self.insert(to, Value::Cast { kind, value }))
     }
 
     fn pointers_have_same_metadata(&self, left_ptr_ty: Ty<'tcx>, right_ptr_ty: Ty<'tcx>) -> bool {
@@ -1592,7 +1612,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
 fn op_to_prop_const<'tcx>(
     ecx: &mut InterpCx<'tcx, DummyMachine>,
     op: &OpTy<'tcx>,
-) -> Option<ConstValue<'tcx>> {
+) -> Option<ConstValue> {
     // Do not attempt to propagate unsized locals.
     if op.layout.is_unsized() {
         return None;
@@ -1686,7 +1706,7 @@ impl<'tcx> VnState<'_, 'tcx> {
         // This was already constant in MIR, do not change it. If the constant is not
         // deterministic, adding an additional mention of it in MIR will not give the same value as
         // the former mention.
-        if let Value::Constant { value, disambiguator: 0 } = *self.get(index) {
+        if let Value::Constant { value, disambiguator: None } = *self.get(index) {
             debug_assert!(value.is_deterministic());
             return Some(ConstOperand { span: DUMMY_SP, user_ty: None, const_: value });
         }
@@ -1725,9 +1745,14 @@ impl<'tcx> VnState<'_, 'tcx> {
                 let place =
                     Place { local, projection: self.tcx.mk_place_elems(projection.as_slice()) };
                 return Some(place);
+            } else if projection.last() == Some(&PlaceElem::Deref) {
+                // `Deref` can only be the first projection in a place.
+                // If we are here, we failed to find a local, and we already have a `Deref`.
+                // Trying to add projections will only result in an ill-formed place.
+                return None;
             } else if let Value::Projection(pointer, proj) = *self.get(index)
                 && (allow_complex_projection || proj.is_stable_offset())
-                && let Some(proj) = self.try_as_place_elem(proj, loc)
+                && let Some(proj) = self.try_as_place_elem(self.ty(index), proj, loc)
             {
                 projection.push(proj);
                 index = pointer;
@@ -1755,7 +1780,7 @@ impl<'tcx> MutVisitor<'tcx> for VnState<'_, 'tcx> {
 
     fn visit_place(&mut self, place: &mut Place<'tcx>, context: PlaceContext, location: Location) {
         self.simplify_place_projection(place, location);
-        if context.is_mutating_use() && !place.projection.is_empty() {
+        if context.is_mutating_use() && place.is_indirect() {
             // Non-local mutation maybe invalidate deref.
             self.invalidate_derefs();
         }
@@ -1767,36 +1792,42 @@ impl<'tcx> MutVisitor<'tcx> for VnState<'_, 'tcx> {
         self.super_operand(operand, location);
     }
 
-    fn visit_statement(&mut self, stmt: &mut Statement<'tcx>, location: Location) {
-        if let StatementKind::Assign(box (ref mut lhs, ref mut rvalue)) = stmt.kind {
-            self.simplify_place_projection(lhs, location);
-
-            let value = self.simplify_rvalue(lhs, rvalue, location);
-            let value = if let Some(local) = lhs.as_local()
-                && self.ssa.is_ssa(local)
-                // FIXME(#112651) `rvalue` may have a subtype to `local`. We can only mark
-                // `local` as reusable if we have an exact type match.
-                && self.local_decls[local].ty == rvalue.ty(self.local_decls, self.tcx)
+    fn visit_assign(
+        &mut self,
+        lhs: &mut Place<'tcx>,
+        rvalue: &mut Rvalue<'tcx>,
+        location: Location,
+    ) {
+        self.simplify_place_projection(lhs, location);
+
+        let value = self.simplify_rvalue(lhs, rvalue, location);
+        if let Some(value) = value {
+            if let Some(const_) = self.try_as_constant(value) {
+                *rvalue = Rvalue::Use(Operand::Constant(Box::new(const_)));
+            } else if let Some(place) = self.try_as_place(value, location, false)
+                && *rvalue != Rvalue::Use(Operand::Move(place))
+                && *rvalue != Rvalue::Use(Operand::Copy(place))
             {
-                let value = value.unwrap_or_else(|| self.new_opaque());
-                self.assign(local, value);
-                Some(value)
-            } else {
-                value
-            };
-            if let Some(value) = value {
-                if let Some(const_) = self.try_as_constant(value) {
-                    *rvalue = Rvalue::Use(Operand::Constant(Box::new(const_)));
-                } else if let Some(place) = self.try_as_place(value, location, false)
-                    && *rvalue != Rvalue::Use(Operand::Move(place))
-                    && *rvalue != Rvalue::Use(Operand::Copy(place))
-                {
-                    *rvalue = Rvalue::Use(Operand::Copy(place));
-                    self.reused_locals.insert(place.local);
-                }
+                *rvalue = Rvalue::Use(Operand::Copy(place));
+                self.reused_locals.insert(place.local);
             }
         }
-        self.super_statement(stmt, location);
+
+        if lhs.is_indirect() {
+            // Non-local mutation maybe invalidate deref.
+            self.invalidate_derefs();
+        }
+
+        if let Some(local) = lhs.as_local()
+            && self.ssa.is_ssa(local)
+            && let rvalue_ty = rvalue.ty(self.local_decls, self.tcx)
+            // FIXME(#112651) `rvalue` may have a subtype to `local`. We can only mark
+            // `local` as reusable if we have an exact type match.
+            && self.local_decls[local].ty == rvalue_ty
+        {
+            let value = value.unwrap_or_else(|| self.new_opaque(rvalue_ty));
+            self.assign(local, value);
+        }
     }
 
     fn visit_terminator(&mut self, terminator: &mut Terminator<'tcx>, location: Location) {
@@ -1804,7 +1835,8 @@ impl<'tcx> MutVisitor<'tcx> for VnState<'_, 'tcx> {
             if let Some(local) = destination.as_local()
                 && self.ssa.is_ssa(local)
             {
-                let opaque = self.new_opaque();
+                let ty = self.local_decls[local].ty;
+                let opaque = self.new_opaque(ty);
                 self.assign(local, opaque);
             }
         }
diff --git a/compiler/rustc_mir_transform/src/impossible_predicates.rs b/compiler/rustc_mir_transform/src/impossible_predicates.rs
index 86e2bf6cb3c..b03518de00a 100644
--- a/compiler/rustc_mir_transform/src/impossible_predicates.rs
+++ b/compiler/rustc_mir_transform/src/impossible_predicates.rs
@@ -27,7 +27,7 @@
 //! it's usually never invoked in this way.
 
 use rustc_middle::mir::{Body, START_BLOCK, TerminatorKind};
-use rustc_middle::ty::{TyCtxt, TypeVisitableExt};
+use rustc_middle::ty::{TyCtxt, TypeFlags, TypeVisitableExt};
 use rustc_trait_selection::traits;
 use tracing::trace;
 
@@ -36,14 +36,23 @@ use crate::pass_manager::MirPass;
 pub(crate) struct ImpossiblePredicates;
 
 impl<'tcx> MirPass<'tcx> for ImpossiblePredicates {
+    #[tracing::instrument(level = "trace", skip(self, tcx, body))]
     fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
-        let predicates = tcx
-            .predicates_of(body.source.def_id())
-            .predicates
-            .iter()
-            .filter_map(|(p, _)| if p.is_global() { Some(*p) } else { None });
-        if traits::impossible_predicates(tcx, traits::elaborate(tcx, predicates).collect()) {
-            trace!("found unsatisfiable predicates for {:?}", body.source);
+        tracing::trace!(def_id = ?body.source.def_id());
+        let predicates = tcx.predicates_of(body.source.def_id()).instantiate_identity(tcx);
+        tracing::trace!(?predicates);
+        let predicates = predicates.predicates.into_iter().filter(|p| {
+            !p.has_type_flags(
+                // Only consider global clauses to simplify.
+                TypeFlags::HAS_FREE_LOCAL_NAMES
+                // Clauses that refer to unevaluated constants as they cause cycles.
+                | TypeFlags::HAS_CT_PROJECTION,
+            )
+        });
+        let predicates: Vec<_> = traits::elaborate(tcx, predicates).collect();
+        tracing::trace!(?predicates);
+        if predicates.references_error() || traits::impossible_predicates(tcx, predicates) {
+            trace!("found unsatisfiable predicates");
             // Clear the body to only contain a single `unreachable` statement.
             let bbs = body.basic_blocks.as_mut();
             bbs.raw.truncate(1);
diff --git a/compiler/rustc_mir_transform/src/inline.rs b/compiler/rustc_mir_transform/src/inline.rs
index 1c0fc774867..3d49eb4e8ef 100644
--- a/compiler/rustc_mir_transform/src/inline.rs
+++ b/compiler/rustc_mir_transform/src/inline.rs
@@ -5,7 +5,7 @@ use std::iter;
 use std::ops::{Range, RangeFrom};
 
 use rustc_abi::{ExternAbi, FieldIdx};
-use rustc_attr_data_structures::{InlineAttr, OptimizeAttr};
+use rustc_hir::attrs::{InlineAttr, OptimizeAttr};
 use rustc_hir::def::DefKind;
 use rustc_hir::def_id::DefId;
 use rustc_index::Idx;
diff --git a/compiler/rustc_mir_transform/src/inline/cycle.rs b/compiler/rustc_mir_transform/src/inline/cycle.rs
index 93a81f0dca5..25a9baffe58 100644
--- a/compiler/rustc_mir_transform/src/inline/cycle.rs
+++ b/compiler/rustc_mir_transform/src/inline/cycle.rs
@@ -2,9 +2,9 @@ use rustc_data_structures::fx::{FxHashMap, FxHashSet, FxIndexSet};
 use rustc_data_structures::stack::ensure_sufficient_stack;
 use rustc_data_structures::unord::UnordSet;
 use rustc_hir::def_id::{DefId, LocalDefId};
+use rustc_hir::limit::Limit;
 use rustc_middle::mir::TerminatorKind;
 use rustc_middle::ty::{self, GenericArgsRef, InstanceKind, TyCtxt, TypeVisitableExt};
-use rustc_session::Limit;
 use rustc_span::sym;
 use tracing::{instrument, trace};
 
@@ -64,15 +64,15 @@ fn process<'tcx>(
     typing_env: ty::TypingEnv<'tcx>,
     caller: ty::Instance<'tcx>,
     target: LocalDefId,
-    seen: &mut FxHashSet<ty::Instance<'tcx>>,
+    seen: &mut FxHashMap<ty::Instance<'tcx>, bool>,
     involved: &mut FxHashSet<LocalDefId>,
     recursion_limiter: &mut FxHashMap<DefId, usize>,
     recursion_limit: Limit,
 ) -> bool {
     trace!(%caller);
-    let mut cycle_found = false;
+    let mut reaches_root = false;
 
-    for &(callee, args) in tcx.mir_inliner_callees(caller.def) {
+    for &(callee_def_id, args) in tcx.mir_inliner_callees(caller.def) {
         let Ok(args) = caller.try_instantiate_mir_and_normalize_erasing_regions(
             tcx,
             typing_env,
@@ -81,14 +81,17 @@ fn process<'tcx>(
             trace!(?caller, ?typing_env, ?args, "cannot normalize, skipping");
             continue;
         };
-        let Ok(Some(callee)) = ty::Instance::try_resolve(tcx, typing_env, callee, args) else {
-            trace!(?callee, "cannot resolve, skipping");
+        let Ok(Some(callee)) = ty::Instance::try_resolve(tcx, typing_env, callee_def_id, args)
+        else {
+            trace!(?callee_def_id, "cannot resolve, skipping");
             continue;
         };
 
         // Found a path.
         if callee.def_id() == target.to_def_id() {
-            cycle_found = true;
+            reaches_root = true;
+            seen.insert(callee, true);
+            continue;
         }
 
         if tcx.is_constructor(callee.def_id()) {
@@ -101,10 +104,17 @@ fn process<'tcx>(
             continue;
         }
 
-        if seen.insert(callee) {
+        let callee_reaches_root = if let Some(&c) = seen.get(&callee) {
+            // Even if we have seen this callee before, and thus don't need
+            // to recurse into it, we still need to propagate whether it reaches
+            // the root so that we can mark all the involved callers, in case we
+            // end up reaching that same recursive callee through some *other* cycle.
+            c
+        } else {
+            seen.insert(callee, false);
             let recursion = recursion_limiter.entry(callee.def_id()).or_default();
             trace!(?callee, recursion = *recursion);
-            let found_recursion = if recursion_limit.value_within_limit(*recursion) {
+            let callee_reaches_root = if recursion_limit.value_within_limit(*recursion) {
                 *recursion += 1;
                 ensure_sufficient_stack(|| {
                     process(
@@ -122,17 +132,19 @@ fn process<'tcx>(
                 // Pessimistically assume that there could be recursion.
                 true
             };
-            if found_recursion {
-                if let Some(callee) = callee.def_id().as_local() {
-                    // Calling `optimized_mir` of a non-local definition cannot cycle.
-                    involved.insert(callee);
-                }
-                cycle_found = true;
+            seen.insert(callee, callee_reaches_root);
+            callee_reaches_root
+        };
+        if callee_reaches_root {
+            if let Some(callee_def_id) = callee.def_id().as_local() {
+                // Calling `optimized_mir` of a non-local definition cannot cycle.
+                involved.insert(callee_def_id);
             }
+            reaches_root = true;
         }
     }
 
-    cycle_found
+    reaches_root
 }
 
 #[instrument(level = "debug", skip(tcx), ret)]
@@ -166,7 +178,7 @@ pub(crate) fn mir_callgraph_cyclic<'tcx>(
         typing_env,
         root_instance,
         root,
-        &mut FxHashSet::default(),
+        &mut FxHashMap::default(),
         &mut involved,
         &mut FxHashMap::default(),
         recursion_limit,
diff --git a/compiler/rustc_mir_transform/src/instsimplify.rs b/compiler/rustc_mir_transform/src/instsimplify.rs
index dbcaed20953..c83bd25c663 100644
--- a/compiler/rustc_mir_transform/src/instsimplify.rs
+++ b/compiler/rustc_mir_transform/src/instsimplify.rs
@@ -55,6 +55,7 @@ impl<'tcx> crate::MirPass<'tcx> for InstSimplify {
 
             let terminator = block.terminator.as_mut().unwrap();
             ctx.simplify_primitive_clone(terminator, &mut block.statements);
+            ctx.simplify_align_of_slice_val(terminator, &mut block.statements);
             ctx.simplify_intrinsic_assert(terminator);
             ctx.simplify_nounwind_call(terminator);
             simplify_duplicate_switch_targets(terminator);
@@ -252,6 +253,36 @@ impl<'tcx> InstSimplifyContext<'_, 'tcx> {
         terminator.kind = TerminatorKind::Goto { target: *destination_block };
     }
 
+    // Convert `align_of_val::<[T]>(ptr)` to `align_of::<T>()`, since the
+    // alignment of a slice doesn't actually depend on metadata at all
+    // and the element type is always `Sized`.
+    //
+    // This is here so it can run after inlining, where it's more useful.
+    // (LowerIntrinsics is done in cleanup, before the optimization passes.)
+    fn simplify_align_of_slice_val(
+        &self,
+        terminator: &mut Terminator<'tcx>,
+        statements: &mut Vec<Statement<'tcx>>,
+    ) {
+        if let TerminatorKind::Call {
+            func, args, destination, target: Some(destination_block), ..
+        } = &terminator.kind
+            && args.len() == 1
+            && let Some((fn_def_id, generics)) = func.const_fn_def()
+            && self.tcx.is_intrinsic(fn_def_id, sym::align_of_val)
+            && let ty::Slice(elem_ty) = *generics.type_at(0).kind()
+        {
+            statements.push(Statement::new(
+                terminator.source_info,
+                StatementKind::Assign(Box::new((
+                    *destination,
+                    Rvalue::NullaryOp(NullOp::AlignOf, elem_ty),
+                ))),
+            ));
+            terminator.kind = TerminatorKind::Goto { target: *destination_block };
+        }
+    }
+
     fn simplify_nounwind_call(&self, terminator: &mut Terminator<'tcx>) {
         let TerminatorKind::Call { ref func, ref mut unwind, .. } = terminator.kind else {
             return;
diff --git a/compiler/rustc_mir_transform/src/known_panics_lint.rs b/compiler/rustc_mir_transform/src/known_panics_lint.rs
index 481c7941909..aaacc5866a2 100644
--- a/compiler/rustc_mir_transform/src/known_panics_lint.rs
+++ b/compiler/rustc_mir_transform/src/known_panics_lint.rs
@@ -441,7 +441,6 @@ impl<'mir, 'tcx> ConstPropagator<'mir, 'tcx> {
             | Rvalue::Use(..)
             | Rvalue::CopyForDeref(..)
             | Rvalue::Repeat(..)
-            | Rvalue::Len(..)
             | Rvalue::Cast(..)
             | Rvalue::ShallowInitBox(..)
             | Rvalue::Discriminant(..)
@@ -604,20 +603,6 @@ impl<'mir, 'tcx> ConstPropagator<'mir, 'tcx> {
                 return None;
             }
 
-            Len(place) => {
-                let len = if let ty::Array(_, n) = place.ty(self.local_decls(), self.tcx).ty.kind()
-                {
-                    n.try_to_target_usize(self.tcx)?
-                } else {
-                    match self.get_const(place)? {
-                        Value::Immediate(src) => src.len(&self.ecx).discard_err()?,
-                        Value::Aggregate { fields, .. } => fields.len() as u64,
-                        Value::Uninit => return None,
-                    }
-                };
-                ImmTy::from_scalar(Scalar::from_target_usize(len, self), layout).into()
-            }
-
             Ref(..) | RawPtr(..) => return None,
 
             NullaryOp(ref null_op, ty) => {
diff --git a/compiler/rustc_mir_transform/src/lib.rs b/compiler/rustc_mir_transform/src/lib.rs
index 08f25276cec..9ff7e0b5500 100644
--- a/compiler/rustc_mir_transform/src/lib.rs
+++ b/compiler/rustc_mir_transform/src/lib.rs
@@ -5,6 +5,7 @@
 #![feature(const_type_name)]
 #![feature(cow_is_borrowed)]
 #![feature(file_buffered)]
+#![feature(gen_blocks)]
 #![feature(if_let_guard)]
 #![feature(impl_trait_in_assoc_type)]
 #![feature(try_blocks)]
@@ -116,6 +117,7 @@ declare_passes! {
     mod add_subtyping_projections : Subtyper;
     mod check_inline : CheckForceInline;
     mod check_call_recursion : CheckCallRecursion, CheckDropRecursion;
+    mod check_inline_always_target_features: CheckInlineAlwaysTargetFeature;
     mod check_alignment : CheckAlignment;
     mod check_enums : CheckEnums;
     mod check_const_item_mutation : CheckConstItemMutation;
@@ -154,7 +156,6 @@ declare_passes! {
     mod match_branches : MatchBranchSimplification;
     mod mentioned_items : MentionedItems;
     mod multiple_return_terminators : MultipleReturnTerminators;
-    mod nrvo : RenameReturnPlace;
     mod post_drop_elaboration : CheckLiveDrops;
     mod prettify : ReorderBasicBlocks, ReorderLocals;
     mod promote_consts : PromoteTemps;
@@ -383,6 +384,9 @@ fn mir_built(tcx: TyCtxt<'_>, def: LocalDefId) -> &Steal<Body<'_>> {
             // MIR-level lints.
             &Lint(check_inline::CheckForceInline),
             &Lint(check_call_recursion::CheckCallRecursion),
+            // Check callee's target features match callers target features when
+            // using `#[inline(always)]`
+            &Lint(check_inline_always_target_features::CheckInlineAlwaysTargetFeature),
             &Lint(check_packed_ref::CheckPackedRef),
             &Lint(check_const_item_mutation::CheckConstItemMutation),
             &Lint(function_item_references::FunctionItemReferences),
@@ -710,7 +714,6 @@ pub(crate) fn run_optimization_passes<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'
             &jump_threading::JumpThreading,
             &early_otherwise_branch::EarlyOtherwiseBranch,
             &simplify_comparison_integral::SimplifyComparisonIntegral,
-            &dest_prop::DestinationPropagation,
             &o1(simplify_branches::SimplifyConstCondition::Final),
             &o1(remove_noop_landing_pads::RemoveNoopLandingPads),
             &o1(simplify::SimplifyCfg::Final),
@@ -718,7 +721,7 @@ pub(crate) fn run_optimization_passes<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'
             &strip_debuginfo::StripDebugInfo,
             &copy_prop::CopyProp,
             &dead_store_elimination::DeadStoreElimination::Final,
-            &nrvo::RenameReturnPlace,
+            &dest_prop::DestinationPropagation,
             &simplify::SimplifyLocals::Final,
             &multiple_return_terminators::MultipleReturnTerminators,
             &large_enums::EnumSizeOpt { discrepancy: 128 },
diff --git a/compiler/rustc_mir_transform/src/lint.rs b/compiler/rustc_mir_transform/src/lint.rs
index f472c7cb493..2ab49645dc4 100644
--- a/compiler/rustc_mir_transform/src/lint.rs
+++ b/compiler/rustc_mir_transform/src/lint.rs
@@ -6,7 +6,7 @@ use std::borrow::Cow;
 
 use rustc_data_structures::fx::FxHashSet;
 use rustc_index::bit_set::DenseBitSet;
-use rustc_middle::mir::visit::{PlaceContext, Visitor};
+use rustc_middle::mir::visit::{PlaceContext, VisitPlacesWith, Visitor};
 use rustc_middle::mir::*;
 use rustc_middle::ty::TyCtxt;
 use rustc_mir_dataflow::impls::{MaybeStorageDead, MaybeStorageLive, always_storage_live_locals};
@@ -79,15 +79,39 @@ impl<'a, 'tcx> Visitor<'tcx> for Lint<'a, 'tcx> {
     fn visit_statement(&mut self, statement: &Statement<'tcx>, location: Location) {
         match &statement.kind {
             StatementKind::Assign(box (dest, rvalue)) => {
-                if let Rvalue::Use(Operand::Copy(src) | Operand::Move(src)) = rvalue {
-                    // The sides of an assignment must not alias. Currently this just checks whether
-                    // the places are identical.
-                    if dest == src {
-                        self.fail(
-                            location,
-                            "encountered `Assign` statement with overlapping memory",
-                        );
-                    }
+                let forbid_aliasing = match rvalue {
+                    Rvalue::Use(..)
+                    | Rvalue::CopyForDeref(..)
+                    | Rvalue::Repeat(..)
+                    | Rvalue::Aggregate(..)
+                    | Rvalue::Cast(..)
+                    | Rvalue::ShallowInitBox(..)
+                    | Rvalue::WrapUnsafeBinder(..) => true,
+                    Rvalue::ThreadLocalRef(..)
+                    | Rvalue::NullaryOp(..)
+                    | Rvalue::UnaryOp(..)
+                    | Rvalue::BinaryOp(..)
+                    | Rvalue::Ref(..)
+                    | Rvalue::RawPtr(..)
+                    | Rvalue::Discriminant(..) => false,
+                };
+                // The sides of an assignment must not alias.
+                if forbid_aliasing {
+                    VisitPlacesWith(|src: Place<'tcx>, _| {
+                        if *dest == src
+                            || (dest.local == src.local
+                                && !dest.is_indirect()
+                                && !src.is_indirect())
+                        {
+                            self.fail(
+                                location,
+                                format!(
+                                    "encountered `{statement:?}` statement with overlapping memory"
+                                ),
+                            );
+                        }
+                    })
+                    .visit_rvalue(rvalue, location);
                 }
             }
             StatementKind::StorageLive(local) => {
diff --git a/compiler/rustc_mir_transform/src/lint_tail_expr_drop_order.rs b/compiler/rustc_mir_transform/src/lint_tail_expr_drop_order.rs
index 2f0edf31162..61c9bbe3123 100644
--- a/compiler/rustc_mir_transform/src/lint_tail_expr_drop_order.rs
+++ b/compiler/rustc_mir_transform/src/lint_tail_expr_drop_order.rs
@@ -2,6 +2,7 @@ use std::cell::RefCell;
 use std::collections::hash_map;
 use std::rc::Rc;
 
+use itertools::Itertools as _;
 use rustc_data_structures::fx::{FxHashMap, FxHashSet, FxIndexMap};
 use rustc_data_structures::unord::{UnordMap, UnordSet};
 use rustc_errors::Subdiagnostic;
@@ -12,8 +13,8 @@ use rustc_index::{IndexSlice, IndexVec};
 use rustc_macros::{LintDiagnostic, Subdiagnostic};
 use rustc_middle::bug;
 use rustc_middle::mir::{
-    self, BasicBlock, Body, ClearCrossCrate, Local, Location, Place, StatementKind, TerminatorKind,
-    dump_mir,
+    self, BasicBlock, Body, ClearCrossCrate, Local, Location, MirDumper, Place, StatementKind,
+    TerminatorKind,
 };
 use rustc_middle::ty::significant_drop_order::{
     extract_component_with_significant_dtor, ty_dtor_span,
@@ -227,7 +228,10 @@ pub(crate) fn run_lint<'tcx>(tcx: TyCtxt<'tcx>, def_id: LocalDefId, body: &Body<
         return;
     }
 
-    dump_mir(tcx, false, "lint_tail_expr_drop_order", &0 as _, body, |_, _| Ok(()));
+    if let Some(dumper) = MirDumper::new(tcx, "lint_tail_expr_drop_order", body) {
+        dumper.dump_mir(body);
+    }
+
     let locals_with_user_names = collect_user_names(body);
     let is_closure_like = tcx.is_closure_like(def_id.to_def_id());
 
@@ -336,9 +340,9 @@ pub(crate) fn run_lint<'tcx>(tcx: TyCtxt<'tcx>, def_id: LocalDefId, body: &Body<
             // Suppose that all BIDs point into the same local,
             // we can remove the this local from the observed drops,
             // so that we can focus our diagnosis more on the others.
-            if candidates.iter().all(|&(_, place)| candidates[0].1.local == place.local) {
+            if let Ok(local) = candidates.iter().map(|&(_, place)| place.local).all_equal_value() {
                 for path_idx in all_locals_dropped.iter() {
-                    if move_data.move_paths[path_idx].place.local == candidates[0].1.local {
+                    if move_data.move_paths[path_idx].place.local == local {
                         to_exclude.insert(path_idx);
                     }
                 }
diff --git a/compiler/rustc_mir_transform/src/nrvo.rs b/compiler/rustc_mir_transform/src/nrvo.rs
deleted file mode 100644
index 965002aae04..00000000000
--- a/compiler/rustc_mir_transform/src/nrvo.rs
+++ /dev/null
@@ -1,234 +0,0 @@
-//! See the docs for [`RenameReturnPlace`].
-
-use rustc_hir::Mutability;
-use rustc_index::bit_set::DenseBitSet;
-use rustc_middle::bug;
-use rustc_middle::mir::visit::{MutVisitor, NonUseContext, PlaceContext, Visitor};
-use rustc_middle::mir::{self, BasicBlock, Local, Location};
-use rustc_middle::ty::TyCtxt;
-use tracing::{debug, trace};
-
-/// This pass looks for MIR that always copies the same local into the return place and eliminates
-/// the copy by renaming all uses of that local to `_0`.
-///
-/// This allows LLVM to perform an optimization similar to the named return value optimization
-/// (NRVO) that is guaranteed in C++. This avoids a stack allocation and `memcpy` for the
-/// relatively common pattern of allocating a buffer on the stack, mutating it, and returning it by
-/// value like so:
-///
-/// ```rust
-/// fn foo(init: fn(&mut [u8; 1024])) -> [u8; 1024] {
-///     let mut buf = [0; 1024];
-///     init(&mut buf);
-///     buf
-/// }
-/// ```
-///
-/// For now, this pass is very simple and only capable of eliminating a single copy. A more general
-/// version of copy propagation, such as the one based on non-overlapping live ranges in [#47954] and
-/// [#71003], could yield even more benefits.
-///
-/// [#47954]: https://github.com/rust-lang/rust/pull/47954
-/// [#71003]: https://github.com/rust-lang/rust/pull/71003
-pub(super) struct RenameReturnPlace;
-
-impl<'tcx> crate::MirPass<'tcx> for RenameReturnPlace {
-    fn is_enabled(&self, sess: &rustc_session::Session) -> bool {
-        // unsound: #111005
-        sess.mir_opt_level() > 0 && sess.opts.unstable_opts.unsound_mir_opts
-    }
-
-    fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut mir::Body<'tcx>) {
-        let def_id = body.source.def_id();
-        let Some(returned_local) = local_eligible_for_nrvo(body) else {
-            debug!("`{:?}` was ineligible for NRVO", def_id);
-            return;
-        };
-
-        debug!(
-            "`{:?}` was eligible for NRVO, making {:?} the return place",
-            def_id, returned_local
-        );
-
-        RenameToReturnPlace { tcx, to_rename: returned_local }.visit_body_preserves_cfg(body);
-
-        // Clean up the `NOP`s we inserted for statements made useless by our renaming.
-        for block_data in body.basic_blocks.as_mut_preserves_cfg() {
-            block_data.statements.retain(|stmt| stmt.kind != mir::StatementKind::Nop);
-        }
-
-        // Overwrite the debuginfo of `_0` with that of the renamed local.
-        let (renamed_decl, ret_decl) =
-            body.local_decls.pick2_mut(returned_local, mir::RETURN_PLACE);
-
-        // Sometimes, the return place is assigned a local of a different but coercible type, for
-        // example `&mut T` instead of `&T`. Overwriting the `LocalInfo` for the return place means
-        // its type may no longer match the return type of its function. This doesn't cause a
-        // problem in codegen because these two types are layout-compatible, but may be unexpected.
-        debug!("_0: {:?} = {:?}: {:?}", ret_decl.ty, returned_local, renamed_decl.ty);
-        ret_decl.clone_from(renamed_decl);
-
-        // The return place is always mutable.
-        ret_decl.mutability = Mutability::Mut;
-    }
-
-    fn is_required(&self) -> bool {
-        false
-    }
-}
-
-/// MIR that is eligible for the NRVO must fulfill two conditions:
-///   1. The return place must not be read prior to the `Return` terminator.
-///   2. A simple assignment of a whole local to the return place (e.g., `_0 = _1`) must be the
-///      only definition of the return place reaching the `Return` terminator.
-///
-/// If the MIR fulfills both these conditions, this function returns the `Local` that is assigned
-/// to the return place along all possible paths through the control-flow graph.
-fn local_eligible_for_nrvo(body: &mir::Body<'_>) -> Option<Local> {
-    if IsReturnPlaceRead::run(body) {
-        return None;
-    }
-
-    let mut copied_to_return_place = None;
-    for block in body.basic_blocks.indices() {
-        // Look for blocks with a `Return` terminator.
-        if !matches!(body[block].terminator().kind, mir::TerminatorKind::Return) {
-            continue;
-        }
-
-        // Look for an assignment of a single local to the return place prior to the `Return`.
-        let returned_local = find_local_assigned_to_return_place(block, body)?;
-        match body.local_kind(returned_local) {
-            // FIXME: Can we do this for arguments as well?
-            mir::LocalKind::Arg => return None,
-
-            mir::LocalKind::ReturnPointer => bug!("Return place was assigned to itself?"),
-            mir::LocalKind::Temp => {}
-        }
-
-        // If multiple different locals are copied to the return place. We can't pick a
-        // single one to rename.
-        if copied_to_return_place.is_some_and(|old| old != returned_local) {
-            return None;
-        }
-
-        copied_to_return_place = Some(returned_local);
-    }
-
-    copied_to_return_place
-}
-
-fn find_local_assigned_to_return_place(start: BasicBlock, body: &mir::Body<'_>) -> Option<Local> {
-    let mut block = start;
-    let mut seen = DenseBitSet::new_empty(body.basic_blocks.len());
-
-    // Iterate as long as `block` has exactly one predecessor that we have not yet visited.
-    while seen.insert(block) {
-        trace!("Looking for assignments to `_0` in {:?}", block);
-
-        let local = body[block].statements.iter().rev().find_map(as_local_assigned_to_return_place);
-        if local.is_some() {
-            return local;
-        }
-
-        match body.basic_blocks.predecessors()[block].as_slice() {
-            &[pred] => block = pred,
-            _ => return None,
-        }
-    }
-
-    None
-}
-
-// If this statement is an assignment of an unprojected local to the return place,
-// return that local.
-fn as_local_assigned_to_return_place(stmt: &mir::Statement<'_>) -> Option<Local> {
-    if let mir::StatementKind::Assign(box (lhs, rhs)) = &stmt.kind {
-        if lhs.as_local() == Some(mir::RETURN_PLACE) {
-            if let mir::Rvalue::Use(mir::Operand::Copy(rhs) | mir::Operand::Move(rhs)) = rhs {
-                return rhs.as_local();
-            }
-        }
-    }
-
-    None
-}
-
-struct RenameToReturnPlace<'tcx> {
-    to_rename: Local,
-    tcx: TyCtxt<'tcx>,
-}
-
-/// Replaces all uses of `self.to_rename` with `_0`.
-impl<'tcx> MutVisitor<'tcx> for RenameToReturnPlace<'tcx> {
-    fn tcx(&self) -> TyCtxt<'tcx> {
-        self.tcx
-    }
-
-    fn visit_statement(&mut self, stmt: &mut mir::Statement<'tcx>, loc: Location) {
-        // Remove assignments of the local being replaced to the return place, since it is now the
-        // return place:
-        //     _0 = _1
-        if as_local_assigned_to_return_place(stmt) == Some(self.to_rename) {
-            stmt.kind = mir::StatementKind::Nop;
-            return;
-        }
-
-        // Remove storage annotations for the local being replaced:
-        //     StorageLive(_1)
-        if let mir::StatementKind::StorageLive(local) | mir::StatementKind::StorageDead(local) =
-            stmt.kind
-        {
-            if local == self.to_rename {
-                stmt.kind = mir::StatementKind::Nop;
-                return;
-            }
-        }
-
-        self.super_statement(stmt, loc)
-    }
-
-    fn visit_terminator(&mut self, terminator: &mut mir::Terminator<'tcx>, loc: Location) {
-        // Ignore the implicit "use" of the return place in a `Return` statement.
-        if let mir::TerminatorKind::Return = terminator.kind {
-            return;
-        }
-
-        self.super_terminator(terminator, loc);
-    }
-
-    fn visit_local(&mut self, l: &mut Local, ctxt: PlaceContext, _: Location) {
-        if *l == mir::RETURN_PLACE {
-            assert_eq!(ctxt, PlaceContext::NonUse(NonUseContext::VarDebugInfo));
-        } else if *l == self.to_rename {
-            *l = mir::RETURN_PLACE;
-        }
-    }
-}
-
-struct IsReturnPlaceRead(bool);
-
-impl IsReturnPlaceRead {
-    fn run(body: &mir::Body<'_>) -> bool {
-        let mut vis = IsReturnPlaceRead(false);
-        vis.visit_body(body);
-        vis.0
-    }
-}
-
-impl<'tcx> Visitor<'tcx> for IsReturnPlaceRead {
-    fn visit_local(&mut self, l: Local, ctxt: PlaceContext, _: Location) {
-        if l == mir::RETURN_PLACE && ctxt.is_use() && !ctxt.is_place_assignment() {
-            self.0 = true;
-        }
-    }
-
-    fn visit_terminator(&mut self, terminator: &mir::Terminator<'tcx>, loc: Location) {
-        // Ignore the implicit "use" of the return place in a `Return` statement.
-        if let mir::TerminatorKind::Return = terminator.kind {
-            return;
-        }
-
-        self.super_terminator(terminator, loc);
-    }
-}
diff --git a/compiler/rustc_mir_transform/src/pass_manager.rs b/compiler/rustc_mir_transform/src/pass_manager.rs
index 7a8d3ba1ff1..ab09cdf787e 100644
--- a/compiler/rustc_mir_transform/src/pass_manager.rs
+++ b/compiler/rustc_mir_transform/src/pass_manager.rs
@@ -2,7 +2,7 @@ use std::cell::RefCell;
 use std::collections::hash_map::Entry;
 
 use rustc_data_structures::fx::{FxHashMap, FxIndexSet};
-use rustc_middle::mir::{self, Body, MirPhase, RuntimePhase};
+use rustc_middle::mir::{Body, MirDumper, MirPhase, RuntimePhase};
 use rustc_middle::ty::TyCtxt;
 use rustc_session::Session;
 use tracing::trace;
@@ -41,19 +41,40 @@ fn to_profiler_name(type_name: &'static str) -> &'static str {
     })
 }
 
-// const wrapper for `if let Some((_, tail)) = name.rsplit_once(':') { tail } else { name }`
-const fn c_name(name: &'static str) -> &'static str {
+// A function that simplifies a pass's type_name. E.g. `Baz`, `Baz<'_>`,
+// `foo::bar::Baz`, and `foo::bar::Baz<'a, 'b>` all become `Baz`.
+//
+// It's `const` for perf reasons: it's called a lot, and doing the string
+// operations at runtime causes a non-trivial slowdown. If
+// `split_once`/`rsplit_once` become `const` its body could be simplified to
+// this:
+// ```ignore (fragment)
+// let name = if let Some((_, tail)) = name.rsplit_once(':') { tail } else { name };
+// let name = if let Some((head, _)) = name.split_once('<') { head } else { name };
+// name
+// ```
+const fn simplify_pass_type_name(name: &'static str) -> &'static str {
     // FIXME(const-hack) Simplify the implementation once more `str` methods get const-stable.
-    // and inline into call site
+
+    // Work backwards from the end. If a ':' is hit, strip it and everything before it.
     let bytes = name.as_bytes();
     let mut i = bytes.len();
     while i > 0 && bytes[i - 1] != b':' {
-        i = i - 1;
+        i -= 1;
     }
     let (_, bytes) = bytes.split_at(i);
+
+    // Work forwards from the start of what's left. If a '<' is hit, strip it and everything after
+    // it.
+    let mut i = 0;
+    while i < bytes.len() && bytes[i] != b'<' {
+        i += 1;
+    }
+    let (bytes, _) = bytes.split_at(i);
+
     match std::str::from_utf8(bytes) {
         Ok(name) => name,
-        Err(_) => name,
+        Err(_) => panic!(),
     }
 }
 
@@ -62,12 +83,7 @@ const fn c_name(name: &'static str) -> &'static str {
 /// loop that goes over each available MIR and applies `run_pass`.
 pub(super) trait MirPass<'tcx> {
     fn name(&self) -> &'static str {
-        // FIXME(const-hack) Simplify the implementation once more `str` methods get const-stable.
-        // See copypaste in `MirLint`
-        const {
-            let name = std::any::type_name::<Self>();
-            c_name(name)
-        }
+        const { simplify_pass_type_name(std::any::type_name::<Self>()) }
     }
 
     fn profiler_name(&self) -> &'static str {
@@ -101,12 +117,7 @@ pub(super) trait MirPass<'tcx> {
 /// disabled (via the `Lint` adapter).
 pub(super) trait MirLint<'tcx> {
     fn name(&self) -> &'static str {
-        // FIXME(const-hack) Simplify the implementation once more `str` methods get const-stable.
-        // See copypaste in `MirPass`
-        const {
-            let name = std::any::type_name::<Self>();
-            c_name(name)
-        }
+        const { simplify_pass_type_name(std::any::type_name::<Self>()) }
     }
 
     fn is_enabled(&self, _sess: &Session) -> bool {
@@ -270,16 +281,22 @@ fn run_passes_inner<'tcx>(
         let lint = tcx.sess.opts.unstable_opts.lint_mir;
 
         for pass in passes {
-            let name = pass.name();
+            let pass_name = pass.name();
 
             if !should_run_pass(tcx, *pass, optimizations) {
                 continue;
             };
 
-            let dump_enabled = pass.is_mir_dump_enabled();
+            let dumper = if pass.is_mir_dump_enabled()
+                && let Some(dumper) = MirDumper::new(tcx, pass_name, body)
+            {
+                Some(dumper.set_show_pass_num().set_disambiguator(&"before"))
+            } else {
+                None
+            };
 
-            if dump_enabled {
-                dump_mir_for_pass(tcx, body, name, false);
+            if let Some(dumper) = dumper.as_ref() {
+                dumper.dump_mir(body);
             }
 
             if let Some(prof_arg) = &prof_arg {
@@ -291,14 +308,15 @@ fn run_passes_inner<'tcx>(
                 pass.run_pass(tcx, body);
             }
 
-            if dump_enabled {
-                dump_mir_for_pass(tcx, body, name, true);
+            if let Some(dumper) = dumper {
+                dumper.set_disambiguator(&"after").dump_mir(body);
             }
+
             if validate {
-                validate_body(tcx, body, format!("after pass {name}"));
+                validate_body(tcx, body, format!("after pass {pass_name}"));
             }
             if lint {
-                lint_body(tcx, body, format!("after pass {name}"));
+                lint_body(tcx, body, format!("after pass {pass_name}"));
             }
 
             body.pass_count += 1;
@@ -334,18 +352,9 @@ pub(super) fn validate_body<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>, when
     validate::Validator { when }.run_pass(tcx, body);
 }
 
-fn dump_mir_for_pass<'tcx>(tcx: TyCtxt<'tcx>, body: &Body<'tcx>, pass_name: &str, is_after: bool) {
-    mir::dump_mir(
-        tcx,
-        true,
-        pass_name,
-        if is_after { &"after" } else { &"before" },
-        body,
-        |_, _| Ok(()),
-    );
-}
-
 pub(super) fn dump_mir_for_phase_change<'tcx>(tcx: TyCtxt<'tcx>, body: &Body<'tcx>) {
     assert_eq!(body.pass_count, 0);
-    mir::dump_mir(tcx, true, body.phase.name(), &"after", body, |_, _| Ok(()))
+    if let Some(dumper) = MirDumper::new(tcx, body.phase.name(), body) {
+        dumper.set_show_pass_num().set_disambiguator(&"after").dump_mir(body)
+    }
 }
diff --git a/compiler/rustc_mir_transform/src/promote_consts.rs b/compiler/rustc_mir_transform/src/promote_consts.rs
index 4e8f30e077b..a0b0c8c990f 100644
--- a/compiler/rustc_mir_transform/src/promote_consts.rs
+++ b/compiler/rustc_mir_transform/src/promote_consts.rs
@@ -437,9 +437,7 @@ impl<'tcx> Validator<'_, 'tcx> {
                 self.validate_operand(op)?
             }
 
-            Rvalue::Discriminant(place) | Rvalue::Len(place) => {
-                self.validate_place(place.as_ref())?
-            }
+            Rvalue::Discriminant(place) => self.validate_place(place.as_ref())?,
 
             Rvalue::ThreadLocalRef(_) => return Err(Unpromotable),
 
@@ -875,7 +873,8 @@ impl<'a, 'tcx> Promoter<'a, 'tcx> {
             let mut promoted_operand = |ty, span| {
                 promoted.span = span;
                 promoted.local_decls[RETURN_PLACE] = LocalDecl::new(ty, span);
-                let args = tcx.erase_regions(GenericArgs::identity_for_item(tcx, def));
+                let args =
+                    tcx.erase_and_anonymize_regions(GenericArgs::identity_for_item(tcx, def));
                 let uneval =
                     mir::UnevaluatedConst { def, args, promoted: Some(next_promoted_index) };
 
@@ -997,12 +996,11 @@ fn promote_candidates<'tcx>(
     for candidate in candidates.into_iter().rev() {
         let Location { block, statement_index } = candidate.location;
         if let StatementKind::Assign(box (place, _)) = &body[block].statements[statement_index].kind
+            && let Some(local) = place.as_local()
         {
-            if let Some(local) = place.as_local() {
-                if temps[local] == TempState::PromotedOut {
-                    // Already promoted.
-                    continue;
-                }
+            if temps[local] == TempState::PromotedOut {
+                // Already promoted.
+                continue;
             }
         }
 
@@ -1066,11 +1064,11 @@ fn promote_candidates<'tcx>(
             _ => true,
         });
         let terminator = block.terminator_mut();
-        if let TerminatorKind::Drop { place, target, .. } = &terminator.kind {
-            if let Some(index) = place.as_local() {
-                if promoted(index) {
-                    terminator.kind = TerminatorKind::Goto { target: *target };
-                }
+        if let TerminatorKind::Drop { place, target, .. } = &terminator.kind
+            && let Some(index) = place.as_local()
+        {
+            if promoted(index) {
+                terminator.kind = TerminatorKind::Goto { target: *target };
             }
         }
     }
diff --git a/compiler/rustc_mir_transform/src/ref_prop.rs b/compiler/rustc_mir_transform/src/ref_prop.rs
index d1c2d6b508f..6f61215cee2 100644
--- a/compiler/rustc_mir_transform/src/ref_prop.rs
+++ b/compiler/rustc_mir_transform/src/ref_prop.rs
@@ -79,6 +79,7 @@ impl<'tcx> crate::MirPass<'tcx> for ReferencePropagation {
     #[instrument(level = "trace", skip(self, tcx, body))]
     fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
         debug!(def_id = ?body.source.def_id());
+        move_to_copy_pointers(tcx, body);
         while propagate_ssa(tcx, body) {}
     }
 
@@ -87,11 +88,43 @@ impl<'tcx> crate::MirPass<'tcx> for ReferencePropagation {
     }
 }
 
+/// The SSA analysis done by [`SsaLocals`] treats [`Operand::Move`] as a read, even though in
+/// general [`Operand::Move`] represents pass-by-pointer where the callee can overwrite the
+/// pointee (Miri always considers the place deinitialized). CopyProp has a similar trick to
+/// turn [`Operand::Move`] into [`Operand::Copy`] when required for an optimization, but in this
+/// pass we just turn all moves of pointers into copies because pointers should be by-value anyway.
+fn move_to_copy_pointers<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
+    let mut visitor = MoveToCopyVisitor { tcx, local_decls: &body.local_decls };
+    for (bb, data) in body.basic_blocks.as_mut_preserves_cfg().iter_enumerated_mut() {
+        visitor.visit_basic_block_data(bb, data);
+    }
+
+    struct MoveToCopyVisitor<'a, 'tcx> {
+        tcx: TyCtxt<'tcx>,
+        local_decls: &'a IndexVec<Local, LocalDecl<'tcx>>,
+    }
+
+    impl<'a, 'tcx> MutVisitor<'tcx> for MoveToCopyVisitor<'a, 'tcx> {
+        fn tcx(&self) -> TyCtxt<'tcx> {
+            self.tcx
+        }
+
+        fn visit_operand(&mut self, operand: &mut Operand<'tcx>, loc: Location) {
+            if let Operand::Move(place) = *operand {
+                if place.ty(self.local_decls, self.tcx).ty.is_any_ptr() {
+                    *operand = Operand::Copy(place);
+                }
+            }
+            self.super_operand(operand, loc);
+        }
+    }
+}
+
 fn propagate_ssa<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) -> bool {
     let typing_env = body.typing_env(tcx);
     let ssa = SsaLocals::new(tcx, body, typing_env);
 
-    let mut replacer = compute_replacement(tcx, body, &ssa);
+    let mut replacer = compute_replacement(tcx, body, ssa);
     debug!(?replacer.targets);
     debug!(?replacer.allowed_replacements);
     debug!(?replacer.storage_to_remove);
@@ -119,7 +152,7 @@ enum Value<'tcx> {
 fn compute_replacement<'tcx>(
     tcx: TyCtxt<'tcx>,
     body: &Body<'tcx>,
-    ssa: &SsaLocals,
+    ssa: SsaLocals,
 ) -> Replacer<'tcx> {
     let always_live_locals = always_storage_live_locals(body);
 
@@ -138,7 +171,7 @@ fn compute_replacement<'tcx>(
     // reborrowed references.
     let mut storage_to_remove = DenseBitSet::new_empty(body.local_decls.len());
 
-    let fully_replaceable_locals = fully_replaceable_locals(ssa);
+    let fully_replaceable_locals = fully_replaceable_locals(&ssa);
 
     // Returns true iff we can use `place` as a pointee.
     //
diff --git a/compiler/rustc_mir_transform/src/remove_noop_landing_pads.rs b/compiler/rustc_mir_transform/src/remove_noop_landing_pads.rs
index 797056ad52d..5b6d7ffb511 100644
--- a/compiler/rustc_mir_transform/src/remove_noop_landing_pads.rs
+++ b/compiler/rustc_mir_transform/src/remove_noop_landing_pads.rs
@@ -48,14 +48,13 @@ impl<'tcx> crate::MirPass<'tcx> for RemoveNoopLandingPads {
         let postorder: Vec<_> = traversal::postorder(body).map(|(bb, _)| bb).collect();
         for bb in postorder {
             debug!("  processing {:?}", bb);
-            if let Some(unwind) = body[bb].terminator_mut().unwind_mut() {
-                if let UnwindAction::Cleanup(unwind_bb) = *unwind {
-                    if nop_landing_pads.contains(unwind_bb) {
-                        debug!("    removing noop landing pad");
-                        landing_pads_removed += 1;
-                        *unwind = UnwindAction::Continue;
-                    }
-                }
+            if let Some(unwind) = body[bb].terminator_mut().unwind_mut()
+                && let UnwindAction::Cleanup(unwind_bb) = *unwind
+                && nop_landing_pads.contains(unwind_bb)
+            {
+                debug!("    removing noop landing pad");
+                landing_pads_removed += 1;
+                *unwind = UnwindAction::Continue;
             }
 
             body[bb].terminator_mut().successors_mut(|target| {
diff --git a/compiler/rustc_mir_transform/src/remove_unneeded_drops.rs b/compiler/rustc_mir_transform/src/remove_unneeded_drops.rs
index 43f80508e4a..6c2dfc59da2 100644
--- a/compiler/rustc_mir_transform/src/remove_unneeded_drops.rs
+++ b/compiler/rustc_mir_transform/src/remove_unneeded_drops.rs
@@ -4,7 +4,13 @@
 //! useful because (unlike MIR building) it runs after type checking, so it can make use of
 //! `TypingMode::PostAnalysis` to provide more precise type information, especially about opaque
 //! types.
+//!
+//! When we're optimizing, we also remove calls to `drop_in_place<T>` when `T` isn't `needs_drop`,
+//! as those are essentially equivalent to `Drop` terminators. While the compiler doesn't insert
+//! them automatically, preferring the built-in instead, they're common in generic code (such as
+//! `Vec::truncate`) so removing them from things like inlined `Vec<u8>` is helpful.
 
+use rustc_hir::LangItem;
 use rustc_middle::mir::*;
 use rustc_middle::ty::TyCtxt;
 use tracing::{debug, trace};
@@ -21,15 +27,26 @@ impl<'tcx> crate::MirPass<'tcx> for RemoveUnneededDrops {
         let mut should_simplify = false;
         for block in body.basic_blocks.as_mut() {
             let terminator = block.terminator_mut();
-            if let TerminatorKind::Drop { place, target, .. } = terminator.kind {
-                let ty = place.ty(&body.local_decls, tcx);
-                if ty.ty.needs_drop(tcx, typing_env) {
-                    continue;
+            let (ty, target) = match terminator.kind {
+                TerminatorKind::Drop { place, target, .. } => {
+                    (place.ty(&body.local_decls, tcx).ty, target)
+                }
+                TerminatorKind::Call { ref func, target: Some(target), .. }
+                    if tcx.sess.mir_opt_level() > 0
+                        && let Some((def_id, generics)) = func.const_fn_def()
+                        && tcx.is_lang_item(def_id, LangItem::DropInPlace) =>
+                {
+                    (generics.type_at(0), target)
                 }
-                debug!("SUCCESS: replacing `drop` with goto({:?})", target);
-                terminator.kind = TerminatorKind::Goto { target };
-                should_simplify = true;
+                _ => continue,
+            };
+
+            if ty.needs_drop(tcx, typing_env) {
+                continue;
             }
+            debug!("SUCCESS: replacing `drop` with goto({:?})", target);
+            terminator.kind = TerminatorKind::Goto { target };
+            should_simplify = true;
         }
 
         // if we applied optimizations, we potentially have some cfg to cleanup to
diff --git a/compiler/rustc_mir_transform/src/shim.rs b/compiler/rustc_mir_transform/src/shim.rs
index cdbc74cdfa8..bca8ffb693b 100644
--- a/compiler/rustc_mir_transform/src/shim.rs
+++ b/compiler/rustc_mir_transform/src/shim.rs
@@ -75,7 +75,7 @@ fn make_shim<'tcx>(tcx: TyCtxt<'tcx>, instance: ty::InstanceKind<'tcx>) -> Body<
             build_call_shim(tcx, instance, Some(adjustment), CallKind::Direct(def_id))
         }
         ty::InstanceKind::FnPtrShim(def_id, ty) => {
-            let trait_ = tcx.trait_of_item(def_id).unwrap();
+            let trait_ = tcx.parent(def_id);
             // Supports `Fn` or `async Fn` traits.
             let adjustment = match tcx
                 .fn_trait_kind_from_def_id(trait_)
@@ -434,8 +434,8 @@ pub(super) struct DropShimElaborator<'a, 'tcx> {
 }
 
 impl fmt::Debug for DropShimElaborator<'_, '_> {
-    fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
-        Ok(())
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
+        f.debug_struct("DropShimElaborator").finish_non_exhaustive()
     }
 }
 
@@ -1242,14 +1242,12 @@ fn build_construct_coroutine_by_move_shim<'tcx>(
 
     let body =
         new_body(source, IndexVec::from_elem_n(start_block, 1), locals, sig.inputs().len(), span);
-    dump_mir(
-        tcx,
-        false,
-        if receiver_by_ref { "coroutine_closure_by_ref" } else { "coroutine_closure_by_move" },
-        &0,
-        &body,
-        |_, _| Ok(()),
-    );
+
+    let pass_name =
+        if receiver_by_ref { "coroutine_closure_by_ref" } else { "coroutine_closure_by_move" };
+    if let Some(dumper) = MirDumper::new(tcx, pass_name, &body) {
+        dumper.dump_mir(&body);
+    }
 
     body
 }
diff --git a/compiler/rustc_mir_transform/src/simplify.rs b/compiler/rustc_mir_transform/src/simplify.rs
index db933da6413..75917d23883 100644
--- a/compiler/rustc_mir_transform/src/simplify.rs
+++ b/compiler/rustc_mir_transform/src/simplify.rs
@@ -34,6 +34,7 @@
 //! The normal logic that a program with UB can be changed to do anything does not apply to
 //! pre-"runtime" MIR!
 
+use itertools::Itertools as _;
 use rustc_index::{Idx, IndexSlice, IndexVec};
 use rustc_middle::mir::visit::{MutVisitor, MutatingUseContext, PlaceContext, Visitor};
 use rustc_middle::mir::*;
@@ -225,6 +226,7 @@ impl<'a, 'tcx> CfgSimplifier<'a, 'tcx> {
             current = target;
         }
         let last = current;
+        *changed |= *start != last;
         *start = last;
         while let Some((current, mut terminator)) = terminators.pop() {
             let Terminator { kind: TerminatorKind::Goto { ref mut target }, .. } = terminator
@@ -287,20 +289,13 @@ impl<'a, 'tcx> CfgSimplifier<'a, 'tcx> {
             return false;
         };
 
-        let first_succ = {
-            if let Some(first_succ) = terminator.successors().next() {
-                if terminator.successors().all(|s| s == first_succ) {
-                    let count = terminator.successors().count();
-                    self.pred_count[first_succ] -= (count - 1) as u32;
-                    first_succ
-                } else {
-                    return false;
-                }
-            } else {
-                return false;
-            }
+        let Ok(first_succ) = terminator.successors().all_equal_value() else {
+            return false;
         };
 
+        let count = terminator.successors().count();
+        self.pred_count[first_succ] -= (count - 1) as u32;
+
         debug!("simplifying branch {:?}", terminator);
         terminator.kind = TerminatorKind::Goto { target: first_succ };
         true
diff --git a/compiler/rustc_mir_transform/src/sroa.rs b/compiler/rustc_mir_transform/src/sroa.rs
index 7c6ccc89c4f..38769885f36 100644
--- a/compiler/rustc_mir_transform/src/sroa.rs
+++ b/compiler/rustc_mir_transform/src/sroa.rs
@@ -1,4 +1,4 @@
-use rustc_abi::{FIRST_VARIANT, FieldIdx};
+use rustc_abi::FieldIdx;
 use rustc_data_structures::flat_map_in_place::FlatMapInPlace;
 use rustc_hir::LangItem;
 use rustc_index::IndexVec;
@@ -32,7 +32,7 @@ impl<'tcx> crate::MirPass<'tcx> for ScalarReplacementOfAggregates {
         let typing_env = body.typing_env(tcx);
         loop {
             debug!(?excluded);
-            let escaping = escaping_locals(tcx, typing_env, &excluded, body);
+            let escaping = escaping_locals(tcx, &excluded, body);
             debug!(?escaping);
             let replacements = compute_flattening(tcx, typing_env, body, escaping);
             debug!(?replacements);
@@ -64,7 +64,6 @@ impl<'tcx> crate::MirPass<'tcx> for ScalarReplacementOfAggregates {
 ///   client code.
 fn escaping_locals<'tcx>(
     tcx: TyCtxt<'tcx>,
-    typing_env: ty::TypingEnv<'tcx>,
     excluded: &DenseBitSet<Local>,
     body: &Body<'tcx>,
 ) -> DenseBitSet<Local> {
@@ -72,31 +71,16 @@ fn escaping_locals<'tcx>(
         if ty.is_union() || ty.is_enum() {
             return true;
         }
-        if let ty::Adt(def, _args) = ty.kind() {
-            if def.repr().simd() {
-                // Exclude #[repr(simd)] types so that they are not de-optimized into an array
-                return true;
-            }
-            if tcx.is_lang_item(def.did(), LangItem::DynMetadata) {
-                // codegen wants to see the `DynMetadata<T>`,
-                // not the inner reference-to-opaque-type.
-                return true;
-            }
-            // We already excluded unions and enums, so this ADT must have one variant
-            let variant = def.variant(FIRST_VARIANT);
-            if variant.fields.len() > 1 {
-                // If this has more than one field, it cannot be a wrapper that only provides a
-                // niche, so we do not want to automatically exclude it.
-                return false;
-            }
-            let Ok(layout) = tcx.layout_of(typing_env.as_query_input(ty)) else {
-                // We can't get the layout
-                return true;
-            };
-            if layout.layout.largest_niche().is_some() {
-                // This type has a niche
-                return true;
-            }
+        if let ty::Adt(def, _args) = ty.kind()
+            && (def.repr().simd() || tcx.is_lang_item(def.did(), LangItem::DynMetadata))
+        {
+            // Exclude #[repr(simd)] types so that they are not de-optimized into an array
+            // (MCP#838 banned projections into SIMD types, but if the value is unused
+            // this pass sees "all the uses are of the fields" and expands it.)
+
+            // codegen wants to see the `DynMetadata<T>`,
+            // not the inner reference-to-opaque-type.
+            return true;
         }
         // Default for non-ADTs
         false
diff --git a/compiler/rustc_mir_transform/src/ssa.rs b/compiler/rustc_mir_transform/src/ssa.rs
index cd9a7f4a39d..73c249a3c8c 100644
--- a/compiler/rustc_mir_transform/src/ssa.rs
+++ b/compiler/rustc_mir_transform/src/ssa.rs
@@ -225,6 +225,9 @@ impl SsaVisitor<'_, '_> {
 
 impl<'tcx> Visitor<'tcx> for SsaVisitor<'_, 'tcx> {
     fn visit_local(&mut self, local: Local, ctxt: PlaceContext, loc: Location) {
+        if ctxt.may_observe_address() {
+            self.borrowed_locals.insert(local);
+        }
         match ctxt {
             PlaceContext::MutatingUse(MutatingUseContext::Projection)
             | PlaceContext::NonMutatingUse(NonMutatingUseContext::Projection) => bug!(),
@@ -237,7 +240,6 @@ impl<'tcx> Visitor<'tcx> for SsaVisitor<'_, 'tcx> {
             PlaceContext::NonMutatingUse(
                 NonMutatingUseContext::SharedBorrow | NonMutatingUseContext::FakeBorrow,
             ) => {
-                self.borrowed_locals.insert(local);
                 self.check_dominates(local, loc);
                 self.direct_uses[local] += 1;
             }
diff --git a/compiler/rustc_mir_transform/src/unreachable_prop.rs b/compiler/rustc_mir_transform/src/unreachable_prop.rs
index 13fb5b3e56f..c417a9272f2 100644
--- a/compiler/rustc_mir_transform/src/unreachable_prop.rs
+++ b/compiler/rustc_mir_transform/src/unreachable_prop.rs
@@ -75,7 +75,7 @@ fn remove_successors_from_switch<'tcx>(
     let is_unreachable = |bb| unreachable_blocks.contains(&bb);
 
     // If there are multiple targets, we want to keep information about reachability for codegen.
-    // For example (see tests/codegen/match-optimizes-away.rs)
+    // For example (see tests/codegen-llvm/match-optimizes-away.rs)
     //
     // pub enum Two { A, B }
     // pub fn identity(x: Two) -> Two {
diff --git a/compiler/rustc_mir_transform/src/validate.rs b/compiler/rustc_mir_transform/src/validate.rs
index cbb9bbfd12f..c8a9a88dc3f 100644
--- a/compiler/rustc_mir_transform/src/validate.rs
+++ b/compiler/rustc_mir_transform/src/validate.rs
@@ -1,9 +1,9 @@
 //! Validates the MIR to ensure that invariants are upheld.
 
 use rustc_abi::{ExternAbi, FIRST_VARIANT, Size};
-use rustc_attr_data_structures::InlineAttr;
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
 use rustc_hir::LangItem;
+use rustc_hir::attrs::InlineAttr;
 use rustc_index::IndexVec;
 use rustc_index::bit_set::DenseBitSet;
 use rustc_infer::infer::TyCtxtInferExt;
@@ -80,15 +80,14 @@ impl<'tcx> crate::MirPass<'tcx> for Validator {
             cfg_checker.fail(location, msg);
         }
 
-        if let MirPhase::Runtime(_) = body.phase {
-            if let ty::InstanceKind::Item(_) = body.source.instance {
-                if body.has_free_regions() {
-                    cfg_checker.fail(
-                        Location::START,
-                        format!("Free regions in optimized {} MIR", body.phase.name()),
-                    );
-                }
-            }
+        if let MirPhase::Runtime(_) = body.phase
+            && let ty::InstanceKind::Item(_) = body.source.instance
+            && body.has_free_regions()
+        {
+            cfg_checker.fail(
+                Location::START,
+                format!("Free regions in optimized {} MIR", body.phase.name()),
+            );
         }
     }
 
@@ -119,14 +118,16 @@ impl<'a, 'tcx> CfgChecker<'a, 'tcx> {
     #[track_caller]
     fn fail(&self, location: Location, msg: impl AsRef<str>) {
         // We might see broken MIR when other errors have already occurred.
-        assert!(
-            self.tcx.dcx().has_errors().is_some(),
-            "broken MIR in {:?} ({}) at {:?}:\n{}",
-            self.body.source.instance,
-            self.when,
-            location,
-            msg.as_ref(),
-        );
+        if self.tcx.dcx().has_errors().is_none() {
+            span_bug!(
+                self.body.source_info(location).span,
+                "broken MIR in {:?} ({}) at {:?}:\n{}",
+                self.body.source.instance,
+                self.when,
+                location,
+                msg.as_ref(),
+            );
+        }
     }
 
     fn check_edge(&mut self, location: Location, bb: BasicBlock, edge_kind: EdgeKind) {
@@ -719,6 +720,15 @@ impl<'a, 'tcx> Visitor<'tcx> for TypeChecker<'a, 'tcx> {
                             );
                         }
 
+                        if adt_def.repr().simd() {
+                            self.fail(
+                                location,
+                                format!(
+                                    "Projecting into SIMD type {adt_def:?} is banned by MCP#838"
+                                ),
+                            );
+                        }
+
                         let var = parent_ty.variant_index.unwrap_or(FIRST_VARIANT);
                         let Some(field) = adt_def.variant(var).fields.get(f) else {
                             fail_out_of_bounds(self, location);
@@ -1054,14 +1064,6 @@ impl<'a, 'tcx> Visitor<'tcx> for TypeChecker<'a, 'tcx> {
                 }
             }
             Rvalue::Ref(..) => {}
-            Rvalue::Len(p) => {
-                let pty = p.ty(&self.body.local_decls, self.tcx).ty;
-                check_kinds!(
-                    pty,
-                    "Cannot compute length of non-array type {:?}",
-                    ty::Array(..) | ty::Slice(..)
-                );
-            }
             Rvalue::BinaryOp(op, vals) => {
                 use BinOp::*;
                 let a = vals.0.ty(&self.body.local_decls, self.tcx);