63 files changed, 1581 insertions, 510 deletions
diff --git a/compiler/rustc_borrowck/src/consumers.rs b/compiler/rustc_borrowck/src/consumers.rs
index 1c4ff5a6779..54897371410 100644
--- a/compiler/rustc_borrowck/src/consumers.rs
+++ b/compiler/rustc_borrowck/src/consumers.rs
@@ -17,7 +17,7 @@ pub use super::polonius::legacy::{
     RichLocation, RustcFacts,
 };
 pub use super::region_infer::RegionInferenceContext;
-use crate::{BorrowCheckRootCtxt, do_mir_borrowck};
+use crate::BorrowCheckRootCtxt;
 
 /// Struct used during mir borrowck to collect bodies with facts for a typeck root and all
 /// its nested bodies.
@@ -127,13 +127,6 @@ pub fn get_bodies_with_borrowck_facts(
 ) -> FxHashMap<LocalDefId, BodyWithBorrowckFacts<'_>> {
     let mut root_cx =
         BorrowCheckRootCtxt::new(tcx, root_def_id, Some(BorrowckConsumer::new(options)));
-
-    // See comment in `rustc_borrowck::mir_borrowck`
-    let nested_bodies = tcx.nested_bodies_within(root_def_id);
-    for def_id in nested_bodies {
-        root_cx.get_or_insert_nested(def_id);
-    }
-
-    do_mir_borrowck(&mut root_cx, root_def_id);
+    root_cx.do_mir_borrowck();
     root_cx.consumer.unwrap().bodies
 }
diff --git a/compiler/rustc_borrowck/src/lib.rs b/compiler/rustc_borrowck/src/lib.rs
index ce78ae203a4..5d2dda8b0e7 100644
--- a/compiler/rustc_borrowck/src/lib.rs
+++ b/compiler/rustc_borrowck/src/lib.rs
@@ -22,8 +22,10 @@ use std::ops::{ControlFlow, Deref};
 use std::rc::Rc;
 
 use borrow_set::LocalsStateAtExit;
+use polonius_engine::AllFacts;
 use root_cx::BorrowCheckRootCtxt;
 use rustc_abi::FieldIdx;
+use rustc_data_structures::frozen::Frozen;
 use rustc_data_structures::fx::{FxIndexMap, FxIndexSet};
 use rustc_data_structures::graph::dominators::Dominators;
 use rustc_errors::LintDiagnostic;
@@ -32,6 +34,7 @@ use rustc_hir::CRATE_HIR_ID;
 use rustc_hir::def_id::LocalDefId;
 use rustc_index::bit_set::MixedBitSet;
 use rustc_index::{IndexSlice, IndexVec};
+use rustc_infer::infer::outlives::env::RegionBoundPairs;
 use rustc_infer::infer::{
     InferCtxt, NllRegionVariableOrigin, RegionVariableOrigin, TyCtxtInferExt,
 };
@@ -53,7 +56,7 @@ use smallvec::SmallVec;
 use tracing::{debug, instrument};
 
 use crate::borrow_set::{BorrowData, BorrowSet};
-use crate::consumers::BodyWithBorrowckFacts;
+use crate::consumers::{BodyWithBorrowckFacts, RustcFacts};
 use crate::dataflow::{BorrowIndex, Borrowck, BorrowckDomain, Borrows};
 use crate::diagnostics::{
     AccessKind, BorrowckDiagnosticsBuffer, IllegalMoveOriginKind, MoveError, RegionName,
@@ -61,15 +64,17 @@ use crate::diagnostics::{
 use crate::path_utils::*;
 use crate::place_ext::PlaceExt;
 use crate::places_conflict::{PlaceConflictBias, places_conflict};
-use crate::polonius::PoloniusDiagnosticsContext;
 use crate::polonius::legacy::{
     PoloniusFacts, PoloniusFactsExt, PoloniusLocationTable, PoloniusOutput,
 };
+use crate::polonius::{PoloniusContext, PoloniusDiagnosticsContext};
 use crate::prefixes::PrefixSet;
 use crate::region_infer::RegionInferenceContext;
+use crate::region_infer::opaque_types::DeferredOpaqueTypeError;
 use crate::renumber::RegionCtxt;
 use crate::session_diagnostics::VarNeedNotMut;
-use crate::type_check::MirTypeckResults;
+use crate::type_check::free_region_relations::UniversalRegionRelations;
+use crate::type_check::{Locations, MirTypeckRegionConstraints, MirTypeckResults};
 
 mod borrow_set;
 mod borrowck_errors;
@@ -129,18 +134,7 @@ fn mir_borrowck(
         Ok(tcx.arena.alloc(opaque_types))
     } else {
         let mut root_cx = BorrowCheckRootCtxt::new(tcx, def, None);
-        // We need to manually borrowck all nested bodies from the HIR as
-        // we do not generate MIR for dead code. Not doing so causes us to
-        // never check closures in dead code.
-        let nested_bodies = tcx.nested_bodies_within(def);
-        for def_id in nested_bodies {
-            root_cx.get_or_insert_nested(def_id);
-        }
-
-        let PropagatedBorrowCheckResults { closure_requirements, used_mut_upvars } =
-            do_mir_borrowck(&mut root_cx, def);
-        debug_assert!(closure_requirements.is_none());
-        debug_assert!(used_mut_upvars.is_empty());
+        root_cx.do_mir_borrowck();
         root_cx.finalize()
     }
 }
@@ -153,6 +147,8 @@ struct PropagatedBorrowCheckResults<'tcx> {
     used_mut_upvars: SmallVec<[FieldIdx; 8]>,
 }
 
+type DeferredClosureRequirements<'tcx> = Vec<(LocalDefId, ty::GenericArgsRef<'tcx>, Locations)>;
+
 /// After we borrow check a closure, we are left with various
 /// requirements that we have inferred between the free regions that
 /// appear in the closure's signature or on its field types. These
@@ -291,14 +287,31 @@ impl<'tcx> ClosureOutlivesSubjectTy<'tcx> {
     }
 }
 
-/// Perform the actual borrow checking.
-///
-/// For nested bodies this should only be called through `root_cx.get_or_insert_nested`.
-#[instrument(skip(root_cx), level = "debug")]
-fn do_mir_borrowck<'tcx>(
+struct CollectRegionConstraintsResult<'tcx> {
+    infcx: BorrowckInferCtxt<'tcx>,
+    body_owned: Body<'tcx>,
+    promoted: IndexVec<Promoted, Body<'tcx>>,
+    move_data: MoveData<'tcx>,
+    borrow_set: BorrowSet<'tcx>,
+    location_table: PoloniusLocationTable,
+    location_map: Rc<DenseLocationMap>,
+    universal_region_relations: Frozen<UniversalRegionRelations<'tcx>>,
+    region_bound_pairs: Frozen<RegionBoundPairs<'tcx>>,
+    known_type_outlives_obligations: Frozen<Vec<ty::PolyTypeOutlivesPredicate<'tcx>>>,
+    constraints: MirTypeckRegionConstraints<'tcx>,
+    deferred_closure_requirements: DeferredClosureRequirements<'tcx>,
+    deferred_opaque_type_errors: Vec<DeferredOpaqueTypeError<'tcx>>,
+    polonius_facts: Option<AllFacts<RustcFacts>>,
+    polonius_context: Option<PoloniusContext>,
+}
+
+/// Start borrow checking by collecting the region constraints for
+/// the current body. This initializes the relevant data structures
+/// and then type checks the MIR body.
+fn borrowck_collect_region_constraints<'tcx>(
     root_cx: &mut BorrowCheckRootCtxt<'tcx>,
     def: LocalDefId,
-) -> PropagatedBorrowCheckResults<'tcx> {
+) -> CollectRegionConstraintsResult<'tcx> {
     let tcx = root_cx.tcx;
     let infcx = BorrowckInferCtxt::new(tcx, def, root_cx.root_def_id());
     let (input_body, promoted) = tcx.mir_promoted(def);
@@ -334,10 +347,11 @@ fn do_mir_borrowck<'tcx>(
 
     // Run the MIR type-checker.
     let MirTypeckResults {
-        mut constraints,
+        constraints,
         universal_region_relations,
         region_bound_pairs,
         known_type_outlives_obligations,
+        deferred_closure_requirements,
         polonius_context,
     } = type_check::type_check(
         root_cx,
@@ -352,16 +366,53 @@ fn do_mir_borrowck<'tcx>(
         Rc::clone(&location_map),
     );
 
-    let opaque_type_errors = region_infer::opaque_types::handle_opaque_type_uses(
-        root_cx,
-        &infcx,
-        &body,
-        &universal_region_relations,
-        &region_bound_pairs,
-        &known_type_outlives_obligations,
-        &location_map,
-        &mut constraints,
-    );
+    CollectRegionConstraintsResult {
+        infcx,
+        body_owned,
+        promoted,
+        move_data,
+        borrow_set,
+        location_table,
+        location_map,
+        universal_region_relations,
+        region_bound_pairs,
+        known_type_outlives_obligations,
+        constraints,
+        deferred_closure_requirements,
+        deferred_opaque_type_errors: Default::default(),
+        polonius_facts,
+        polonius_context,
+    }
+}
+
+/// Using the region constraints computed by [borrowck_collect_region_constraints]
+/// and the additional constraints from [BorrowCheckRootCtxt::handle_opaque_type_uses],
+/// compute the region graph and actually check for any borrowck errors.
+fn borrowck_check_region_constraints<'tcx>(
+    root_cx: &mut BorrowCheckRootCtxt<'tcx>,
+    CollectRegionConstraintsResult {
+        infcx,
+        body_owned,
+        promoted,
+        move_data,
+        borrow_set,
+        location_table,
+        location_map,
+        universal_region_relations,
+        region_bound_pairs: _,
+        known_type_outlives_obligations: _,
+        constraints,
+        deferred_closure_requirements,
+        deferred_opaque_type_errors,
+        polonius_facts,
+        polonius_context,
+    }: CollectRegionConstraintsResult<'tcx>,
+) -> PropagatedBorrowCheckResults<'tcx> {
+    assert!(!infcx.has_opaque_types_in_storage());
+    assert!(deferred_closure_requirements.is_empty());
+    let tcx = root_cx.tcx;
+    let body = &body_owned;
+    let def = body.source.def_id().expect_local();
 
     // Compute non-lexical lifetimes using the constraints computed
     // by typechecking the MIR body.
@@ -481,7 +532,7 @@ fn do_mir_borrowck<'tcx>(
 
     // Compute and report region errors, if any.
     if nll_errors.is_empty() {
-        mbcx.report_opaque_type_errors(opaque_type_errors);
+        mbcx.report_opaque_type_errors(deferred_opaque_type_errors);
     } else {
         mbcx.report_region_errors(nll_errors);
     }
diff --git a/compiler/rustc_borrowck/src/nll.rs b/compiler/rustc_borrowck/src/nll.rs
index 8608a8a3a66..1517a683531 100644
--- a/compiler/rustc_borrowck/src/nll.rs
+++ b/compiler/rustc_borrowck/src/nll.rs
@@ -73,6 +73,38 @@ pub(crate) fn replace_regions_in_mir<'tcx>(
     universal_regions
 }
 
+/// Computes the closure requirements given the current inference state.
+///
+/// This is intended to be used by before [BorrowCheckRootCtxt::handle_opaque_type_uses]
+/// because applying member constraints may rely on closure requirements.
+/// This is frequently the case of async functions where pretty much everything
+/// happens inside of the inner async block but the opaque only gets constrained
+/// in the parent function.
+pub(crate) fn compute_closure_requirements_modulo_opaques<'tcx>(
+    infcx: &BorrowckInferCtxt<'tcx>,
+    body: &Body<'tcx>,
+    location_map: Rc<DenseLocationMap>,
+    universal_region_relations: &Frozen<UniversalRegionRelations<'tcx>>,
+    constraints: &MirTypeckRegionConstraints<'tcx>,
+) -> Option<ClosureRegionRequirements<'tcx>> {
+    // FIXME(#146079): we shouldn't have to clone all this stuff here.
+    // Computing the region graph should take at least some of it by reference/`Rc`.
+    let lowered_constraints = compute_sccs_applying_placeholder_outlives_constraints(
+        constraints.clone(),
+        &universal_region_relations,
+        infcx,
+    );
+    let mut regioncx = RegionInferenceContext::new(
+        &infcx,
+        lowered_constraints,
+        universal_region_relations.clone(),
+        location_map,
+    );
+
+    let (closure_region_requirements, _nll_errors) = regioncx.solve(infcx, body, None);
+    closure_region_requirements
+}
+
 /// Computes the (non-lexical) regions from the input MIR.
 ///
 /// This may result in errors being reported.
diff --git a/compiler/rustc_borrowck/src/region_infer/opaque_types/mod.rs b/compiler/rustc_borrowck/src/region_infer/opaque_types/mod.rs
index bee82e17835..72615cb33b3 100644
--- a/compiler/rustc_borrowck/src/region_infer/opaque_types/mod.rs
+++ b/compiler/rustc_borrowck/src/region_infer/opaque_types/mod.rs
@@ -3,16 +3,16 @@ use std::rc::Rc;
 
 use rustc_data_structures::frozen::Frozen;
 use rustc_data_structures::fx::FxIndexMap;
-use rustc_hir::def_id::DefId;
+use rustc_hir::def_id::{DefId, LocalDefId};
 use rustc_infer::infer::outlives::env::RegionBoundPairs;
 use rustc_infer::infer::{InferCtxt, NllRegionVariableOrigin, OpaqueTypeStorageEntries};
 use rustc_infer::traits::ObligationCause;
 use rustc_macros::extension;
-use rustc_middle::mir::{Body, ConstraintCategory};
+use rustc_middle::mir::{Body, ConcreteOpaqueTypes, ConstraintCategory};
 use rustc_middle::ty::{
-    self, DefiningScopeKind, FallibleTypeFolder, GenericArg, GenericArgsRef, OpaqueHiddenType,
-    OpaqueTypeKey, Region, RegionVid, Ty, TyCtxt, TypeFoldable, TypeSuperFoldable,
-    TypeVisitableExt, fold_regions,
+    self, DefiningScopeKind, EarlyBinder, FallibleTypeFolder, GenericArg, GenericArgsRef,
+    OpaqueHiddenType, OpaqueTypeKey, Region, RegionVid, Ty, TyCtxt, TypeFoldable,
+    TypeSuperFoldable, TypeVisitableExt, fold_regions,
 };
 use rustc_mir_dataflow::points::DenseLocationMap;
 use rustc_span::Span;
@@ -24,13 +24,13 @@ use rustc_trait_selection::traits::query::type_op::custom::CustomTypeOp;
 use tracing::{debug, instrument};
 
 use super::reverse_sccs::ReverseSccGraph;
+use crate::BorrowckInferCtxt;
 use crate::consumers::RegionInferenceContext;
 use crate::session_diagnostics::LifetimeMismatchOpaqueParam;
 use crate::type_check::canonical::fully_perform_op_raw;
 use crate::type_check::free_region_relations::UniversalRegionRelations;
 use crate::type_check::{Locations, MirTypeckRegionConstraints};
 use crate::universal_regions::{RegionClassification, UniversalRegions};
-use crate::{BorrowCheckRootCtxt, BorrowckInferCtxt};
 
 mod member_constraints;
 mod region_ctxt;
@@ -58,78 +58,32 @@ pub(crate) enum DeferredOpaqueTypeError<'tcx> {
     },
 }
 
-/// This looks at all uses of opaque types in their defining scope inside
-/// of this function.
+/// We eagerly map all regions to NLL vars here, as we need to make sure we've
+/// introduced nll vars for all used placeholders.
 ///
-/// It first uses all defining uses to compute the actual concrete type of each
-/// opaque type definition.
-///
-/// We then apply this inferred type to actually check all uses of the opaque.
-pub(crate) fn handle_opaque_type_uses<'tcx>(
-    root_cx: &mut BorrowCheckRootCtxt<'tcx>,
+/// We need to resolve inference vars as even though we're in MIR typeck, we may still
+/// encounter inference variables, e.g. when checking user types.
+pub(crate) fn clone_and_resolve_opaque_types<'tcx>(
     infcx: &BorrowckInferCtxt<'tcx>,
-    body: &Body<'tcx>,
     universal_region_relations: &Frozen<UniversalRegionRelations<'tcx>>,
-    region_bound_pairs: &RegionBoundPairs<'tcx>,
-    known_type_outlives_obligations: &[ty::PolyTypeOutlivesPredicate<'tcx>],
-    location_map: &Rc<DenseLocationMap>,
     constraints: &mut MirTypeckRegionConstraints<'tcx>,
-) -> Vec<DeferredOpaqueTypeError<'tcx>> {
-    let tcx = infcx.tcx;
+) -> (OpaqueTypeStorageEntries, Vec<(OpaqueTypeKey<'tcx>, OpaqueHiddenType<'tcx>)>) {
     let opaque_types = infcx.clone_opaque_types();
-    if opaque_types.is_empty() {
-        return Vec::new();
-    }
-
-    // We need to eagerly map all regions to NLL vars here, as we need to make sure we've
-    // introduced nll vars for all used placeholders.
-    //
-    // We need to resolve inference vars as even though we're in MIR typeck, we may still
-    // encounter inference variables, e.g. when checking user types.
     let opaque_types_storage_num_entries = infcx.inner.borrow_mut().opaque_types().num_entries();
     let opaque_types = opaque_types
         .into_iter()
         .map(|entry| {
-            fold_regions(tcx, infcx.resolve_vars_if_possible(entry), |r, _| {
+            fold_regions(infcx.tcx, infcx.resolve_vars_if_possible(entry), |r, _| {
                 let vid = if let ty::RePlaceholder(placeholder) = r.kind() {
                     constraints.placeholder_region(infcx, placeholder).as_var()
                 } else {
                     universal_region_relations.universal_regions.to_region_vid(r)
                 };
-                Region::new_var(tcx, vid)
+                Region::new_var(infcx.tcx, vid)
             })
         })
         .collect::<Vec<_>>();
-
-    debug!(?opaque_types);
-
-    let errors = compute_concrete_opaque_types(
-        root_cx,
-        infcx,
-        constraints,
-        universal_region_relations,
-        Rc::clone(location_map),
-        &opaque_types,
-    );
-
-    if !errors.is_empty() {
-        return errors;
-    }
-
-    let errors = apply_computed_concrete_opaque_types(
-        root_cx,
-        infcx,
-        body,
-        &universal_region_relations.universal_regions,
-        region_bound_pairs,
-        known_type_outlives_obligations,
-        constraints,
-        &opaque_types,
-    );
-
-    detect_opaque_types_added_while_handling_opaque_types(infcx, opaque_types_storage_num_entries);
-
-    errors
+    (opaque_types_storage_num_entries, opaque_types)
 }
 
 /// Maps an NLL var to a deterministically chosen equal universal region.
@@ -172,6 +126,42 @@ fn nll_var_to_universal_region<'tcx>(
     }
 }
 
+/// Collect all defining uses of opaque types inside of this typeck root. This
+/// expects the hidden type to be mapped to the definition parameters of the opaque
+/// and errors if we end up with distinct hidden types.
+fn add_concrete_opaque_type<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    concrete_opaque_types: &mut ConcreteOpaqueTypes<'tcx>,
+    def_id: LocalDefId,
+    hidden_ty: OpaqueHiddenType<'tcx>,
+) {
+    // Sometimes two opaque types are the same only after we remap the generic parameters
+    // back to the opaque type definition. E.g. we may have `OpaqueType<X, Y>` mapped to
+    // `(X, Y)` and `OpaqueType<Y, X>` mapped to `(Y, X)`, and those are the same, but we
+    // only know that once we convert the generic parameters to those of the opaque type.
+    if let Some(prev) = concrete_opaque_types.0.get_mut(&def_id) {
+        if prev.ty != hidden_ty.ty {
+            let guar = hidden_ty.ty.error_reported().err().unwrap_or_else(|| {
+                let (Ok(e) | Err(e)) = prev.build_mismatch_error(&hidden_ty, tcx).map(|d| d.emit());
+                e
+            });
+            prev.ty = Ty::new_error(tcx, guar);
+        }
+        // Pick a better span if there is one.
+        // FIXME(oli-obk): collect multiple spans for better diagnostics down the road.
+        prev.span = prev.span.substitute_dummy(hidden_ty.span);
+    } else {
+        concrete_opaque_types.0.insert(def_id, hidden_ty);
+    }
+}
+
+fn get_concrete_opaque_type<'tcx>(
+    concrete_opaque_types: &ConcreteOpaqueTypes<'tcx>,
+    def_id: LocalDefId,
+) -> Option<EarlyBinder<'tcx, OpaqueHiddenType<'tcx>>> {
+    concrete_opaque_types.0.get(&def_id).map(|ty| EarlyBinder::bind(*ty))
+}
+
 #[derive(Debug)]
 struct DefiningUse<'tcx> {
     /// The opaque type using non NLL vars. This uses the actual
@@ -193,12 +183,12 @@ struct DefiningUse<'tcx> {
 ///
 /// It also means that this whole function is not really soundness critical as we
 /// recheck all uses of the opaques regardless.
-fn compute_concrete_opaque_types<'tcx>(
-    root_cx: &mut BorrowCheckRootCtxt<'tcx>,
+pub(crate) fn compute_concrete_opaque_types<'tcx>(
     infcx: &BorrowckInferCtxt<'tcx>,
-    constraints: &MirTypeckRegionConstraints<'tcx>,
     universal_region_relations: &Frozen<UniversalRegionRelations<'tcx>>,
+    constraints: &MirTypeckRegionConstraints<'tcx>,
     location_map: Rc<DenseLocationMap>,
+    concrete_opaque_types: &mut ConcreteOpaqueTypes<'tcx>,
     opaque_types: &[(OpaqueTypeKey<'tcx>, OpaqueHiddenType<'tcx>)],
 ) -> Vec<DeferredOpaqueTypeError<'tcx>> {
     let mut errors = Vec::new();
@@ -211,7 +201,8 @@ fn compute_concrete_opaque_types<'tcx>(
     // We start by checking each use of an opaque type during type check and
     // check whether the generic arguments of the opaque type are fully
     // universal, if so, it's a defining use.
-    let defining_uses = collect_defining_uses(root_cx, &mut rcx, opaque_types, &mut errors);
+    let defining_uses =
+        collect_defining_uses(&mut rcx, concrete_opaque_types, opaque_types, &mut errors);
 
     // We now compute and apply member constraints for all regions in the hidden
     // types of each defining use. This mutates the region values of the `rcx` which
@@ -221,14 +212,19 @@ fn compute_concrete_opaque_types<'tcx>(
     // After applying member constraints, we now check whether all member regions ended
     // up equal to one of their choice regions and compute the actual concrete type of
     // the opaque type definition. This is stored in the `root_cx`.
-    compute_concrete_types_from_defining_uses(root_cx, &rcx, &defining_uses, &mut errors);
+    compute_concrete_types_from_defining_uses(
+        &rcx,
+        concrete_opaque_types,
+        &defining_uses,
+        &mut errors,
+    );
     errors
 }
 
 #[instrument(level = "debug", skip_all, ret)]
 fn collect_defining_uses<'tcx>(
-    root_cx: &mut BorrowCheckRootCtxt<'tcx>,
     rcx: &mut RegionCtxt<'_, 'tcx>,
+    concrete_opaque_types: &mut ConcreteOpaqueTypes<'tcx>,
     opaque_types: &[(OpaqueTypeKey<'tcx>, OpaqueHiddenType<'tcx>)],
     errors: &mut Vec<DeferredOpaqueTypeError<'tcx>>,
 ) -> Vec<DefiningUse<'tcx>> {
@@ -248,7 +244,9 @@ fn collect_defining_uses<'tcx>(
             // with `TypingMode::Borrowck`.
             if infcx.tcx.use_typing_mode_borrowck() {
                 match err {
-                    NonDefiningUseReason::Tainted(guar) => root_cx.add_concrete_opaque_type(
+                    NonDefiningUseReason::Tainted(guar) => add_concrete_opaque_type(
+                        infcx.tcx,
+                        concrete_opaque_types,
                         opaque_type_key.def_id,
                         OpaqueHiddenType::new_error(infcx.tcx, guar),
                     ),
@@ -280,8 +278,8 @@ fn collect_defining_uses<'tcx>(
 }
 
 fn compute_concrete_types_from_defining_uses<'tcx>(
-    root_cx: &mut BorrowCheckRootCtxt<'tcx>,
     rcx: &RegionCtxt<'_, 'tcx>,
+    concrete_opaque_types: &mut ConcreteOpaqueTypes<'tcx>,
     defining_uses: &[DefiningUse<'tcx>],
     errors: &mut Vec<DeferredOpaqueTypeError<'tcx>>,
 ) {
@@ -360,7 +358,9 @@ fn compute_concrete_types_from_defining_uses<'tcx>(
                 },
             ));
         }
-        root_cx.add_concrete_opaque_type(
+        add_concrete_opaque_type(
+            tcx,
+            concrete_opaque_types,
             opaque_type_key.def_id,
             OpaqueHiddenType { span: hidden_type.span, ty },
         );
@@ -489,20 +489,20 @@ impl<'tcx> FallibleTypeFolder<TyCtxt<'tcx>> for ToArgRegionsFolder<'_, 'tcx> {
 ///
 /// It does this by equating the hidden type of each use with the instantiated final
 /// hidden type of the opaque.
-fn apply_computed_concrete_opaque_types<'tcx>(
-    root_cx: &mut BorrowCheckRootCtxt<'tcx>,
+pub(crate) fn apply_computed_concrete_opaque_types<'tcx>(
     infcx: &BorrowckInferCtxt<'tcx>,
     body: &Body<'tcx>,
     universal_regions: &UniversalRegions<'tcx>,
     region_bound_pairs: &RegionBoundPairs<'tcx>,
     known_type_outlives_obligations: &[ty::PolyTypeOutlivesPredicate<'tcx>],
     constraints: &mut MirTypeckRegionConstraints<'tcx>,
+    concrete_opaque_types: &mut ConcreteOpaqueTypes<'tcx>,
     opaque_types: &[(OpaqueTypeKey<'tcx>, OpaqueHiddenType<'tcx>)],
 ) -> Vec<DeferredOpaqueTypeError<'tcx>> {
     let tcx = infcx.tcx;
     let mut errors = Vec::new();
     for &(key, hidden_type) in opaque_types {
-        let Some(expected) = root_cx.get_concrete_opaque_type(key.def_id) else {
+        let Some(expected) = get_concrete_opaque_type(concrete_opaque_types, key.def_id) else {
             assert!(tcx.use_typing_mode_borrowck(), "non-defining use in defining scope");
             errors.push(DeferredOpaqueTypeError::NonDefiningUseInDefiningScope {
                 span: hidden_type.span,
@@ -512,7 +512,12 @@ fn apply_computed_concrete_opaque_types<'tcx>(
                 hidden_type.span,
                 "non-defining use in the defining scope with no defining uses",
             );
-            root_cx.add_concrete_opaque_type(key.def_id, OpaqueHiddenType::new_error(tcx, guar));
+            add_concrete_opaque_type(
+                tcx,
+                concrete_opaque_types,
+                key.def_id,
+                OpaqueHiddenType::new_error(tcx, guar),
+            );
             continue;
         };
 
@@ -552,7 +557,12 @@ fn apply_computed_concrete_opaque_types<'tcx>(
                 "equating opaque types",
             ),
         ) {
-            root_cx.add_concrete_opaque_type(key.def_id, OpaqueHiddenType::new_error(tcx, guar));
+            add_concrete_opaque_type(
+                tcx,
+                concrete_opaque_types,
+                key.def_id,
+                OpaqueHiddenType::new_error(tcx, guar),
+            );
         }
     }
     errors
@@ -565,7 +575,7 @@ fn apply_computed_concrete_opaque_types<'tcx>(
 /// an ICE we can properly handle this, but we haven't encountered any such test yet.
 ///
 /// See the related comment in `FnCtxt::detect_opaque_types_added_during_writeback`.
-fn detect_opaque_types_added_while_handling_opaque_types<'tcx>(
+pub(crate) fn detect_opaque_types_added_while_handling_opaque_types<'tcx>(
     infcx: &InferCtxt<'tcx>,
     opaque_types_storage_num_entries: OpaqueTypeStorageEntries,
 ) {
diff --git a/compiler/rustc_borrowck/src/region_infer/values.rs b/compiler/rustc_borrowck/src/region_infer/values.rs
index f1427218cdb..eb611fa3475 100644
--- a/compiler/rustc_borrowck/src/region_infer/values.rs
+++ b/compiler/rustc_borrowck/src/region_infer/values.rs
@@ -37,6 +37,7 @@ pub(crate) enum RegionElement {
 
 /// Records the CFG locations where each region is live. When we initially compute liveness, we use
 /// an interval matrix storing liveness ranges for each region-vid.
+#[derive(Clone)] // FIXME(#146079)
 pub(crate) struct LivenessValues {
     /// The map from locations to points.
     location_map: Rc<DenseLocationMap>,
@@ -194,6 +195,7 @@ impl LivenessValues {
 /// rustc to the internal `PlaceholderIndex` values that are used in
 /// NLL.
 #[derive(Debug, Default)]
+#[derive(Clone)] // FIXME(#146079)
 pub(crate) struct PlaceholderIndices {
     indices: FxIndexSet<ty::PlaceholderRegion>,
 }
diff --git a/compiler/rustc_borrowck/src/root_cx.rs b/compiler/rustc_borrowck/src/root_cx.rs
index 4e90ae391bb..cd4e9683f2d 100644
--- a/compiler/rustc_borrowck/src/root_cx.rs
+++ b/compiler/rustc_borrowck/src/root_cx.rs
@@ -1,13 +1,26 @@
+use std::mem;
+use std::rc::Rc;
+
 use rustc_abi::FieldIdx;
-use rustc_data_structures::fx::FxHashMap;
+use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
 use rustc_hir::def_id::LocalDefId;
-use rustc_middle::bug;
-use rustc_middle::ty::{EarlyBinder, OpaqueHiddenType, Ty, TyCtxt, TypeVisitableExt};
+use rustc_middle::mir::ConstraintCategory;
+use rustc_middle::ty::{self, TyCtxt};
 use rustc_span::ErrorGuaranteed;
 use smallvec::SmallVec;
 
 use crate::consumers::BorrowckConsumer;
-use crate::{ClosureRegionRequirements, ConcreteOpaqueTypes, PropagatedBorrowCheckResults};
+use crate::nll::compute_closure_requirements_modulo_opaques;
+use crate::region_infer::opaque_types::{
+    apply_computed_concrete_opaque_types, clone_and_resolve_opaque_types,
+    compute_concrete_opaque_types, detect_opaque_types_added_while_handling_opaque_types,
+};
+use crate::type_check::{Locations, constraint_conversion};
+use crate::{
+    ClosureRegionRequirements, CollectRegionConstraintsResult, ConcreteOpaqueTypes,
+    PropagatedBorrowCheckResults, borrowck_check_region_constraints,
+    borrowck_collect_region_constraints,
+};
 
 /// The shared context used by both the root as well as all its nested
 /// items.
@@ -15,7 +28,12 @@ pub(super) struct BorrowCheckRootCtxt<'tcx> {
     pub tcx: TyCtxt<'tcx>,
     root_def_id: LocalDefId,
     concrete_opaque_types: ConcreteOpaqueTypes<'tcx>,
-    nested_bodies: FxHashMap<LocalDefId, PropagatedBorrowCheckResults<'tcx>>,
+    /// The region constraints computed by [borrowck_collect_region_constraints]. This uses
+    /// an [FxIndexMap] to guarantee that iterating over it visits nested bodies before
+    /// their parents.
+    collect_region_constraints_results:
+        FxIndexMap<LocalDefId, CollectRegionConstraintsResult<'tcx>>,
+    propagated_borrowck_results: FxHashMap<LocalDefId, PropagatedBorrowCheckResults<'tcx>>,
     tainted_by_errors: Option<ErrorGuaranteed>,
     /// This should be `None` during normal compilation. See [`crate::consumers`] for more
     /// information on how this is used.
@@ -32,7 +50,8 @@ impl<'tcx> BorrowCheckRootCtxt<'tcx> {
             tcx,
             root_def_id,
             concrete_opaque_types: Default::default(),
-            nested_bodies: Default::default(),
+            collect_region_constraints_results: Default::default(),
+            propagated_borrowck_results: Default::default(),
             tainted_by_errors: None,
             consumer,
         }
@@ -42,83 +61,232 @@ impl<'tcx> BorrowCheckRootCtxt<'tcx> {
         self.root_def_id
     }
 
-    /// Collect all defining uses of opaque types inside of this typeck root. This
-    /// expects the hidden type to be mapped to the definition parameters of the opaque
-    /// and errors if we end up with distinct hidden types.
-    pub(super) fn add_concrete_opaque_type(
-        &mut self,
-        def_id: LocalDefId,
-        hidden_ty: OpaqueHiddenType<'tcx>,
-    ) {
-        // Sometimes two opaque types are the same only after we remap the generic parameters
-        // back to the opaque type definition. E.g. we may have `OpaqueType<X, Y>` mapped to
-        // `(X, Y)` and `OpaqueType<Y, X>` mapped to `(Y, X)`, and those are the same, but we
-        // only know that once we convert the generic parameters to those of the opaque type.
-        if let Some(prev) = self.concrete_opaque_types.0.get_mut(&def_id) {
-            if prev.ty != hidden_ty.ty {
-                let guar = hidden_ty.ty.error_reported().err().unwrap_or_else(|| {
-                    let (Ok(e) | Err(e)) =
-                        prev.build_mismatch_error(&hidden_ty, self.tcx).map(|d| d.emit());
-                    e
-                });
-                prev.ty = Ty::new_error(self.tcx, guar);
-            }
-            // Pick a better span if there is one.
-            // FIXME(oli-obk): collect multiple spans for better diagnostics down the road.
-            prev.span = prev.span.substitute_dummy(hidden_ty.span);
-        } else {
-            self.concrete_opaque_types.0.insert(def_id, hidden_ty);
-        }
+    pub(super) fn set_tainted_by_errors(&mut self, guar: ErrorGuaranteed) {
+        self.tainted_by_errors = Some(guar);
     }
 
-    pub(super) fn get_concrete_opaque_type(
+    pub(super) fn used_mut_upvars(
         &mut self,
-        def_id: LocalDefId,
-    ) -> Option<EarlyBinder<'tcx, OpaqueHiddenType<'tcx>>> {
-        self.concrete_opaque_types.0.get(&def_id).map(|ty| EarlyBinder::bind(*ty))
+        nested_body_def_id: LocalDefId,
+    ) -> &SmallVec<[FieldIdx; 8]> {
+        &self.propagated_borrowck_results[&nested_body_def_id].used_mut_upvars
     }
 
-    pub(super) fn set_tainted_by_errors(&mut self, guar: ErrorGuaranteed) {
-        self.tainted_by_errors = Some(guar);
+    pub(super) fn finalize(self) -> Result<&'tcx ConcreteOpaqueTypes<'tcx>, ErrorGuaranteed> {
+        if let Some(guar) = self.tainted_by_errors {
+            Err(guar)
+        } else {
+            Ok(self.tcx.arena.alloc(self.concrete_opaque_types))
+        }
     }
 
-    pub(super) fn get_or_insert_nested(
-        &mut self,
-        def_id: LocalDefId,
-    ) -> &PropagatedBorrowCheckResults<'tcx> {
-        debug_assert_eq!(
-            self.tcx.typeck_root_def_id(def_id.to_def_id()),
-            self.root_def_id.to_def_id()
-        );
-        if !self.nested_bodies.contains_key(&def_id) {
-            let result = super::do_mir_borrowck(self, def_id);
-            if let Some(prev) = self.nested_bodies.insert(def_id, result) {
-                bug!("unexpected previous nested body: {prev:?}");
+    fn handle_opaque_type_uses(&mut self) {
+        let mut per_body_info = Vec::new();
+        for input in self.collect_region_constraints_results.values_mut() {
+            let (num_entries, opaque_types) = clone_and_resolve_opaque_types(
+                &input.infcx,
+                &input.universal_region_relations,
+                &mut input.constraints,
+            );
+            input.deferred_opaque_type_errors = compute_concrete_opaque_types(
+                &input.infcx,
+                &input.universal_region_relations,
+                &input.constraints,
+                Rc::clone(&input.location_map),
+                &mut self.concrete_opaque_types,
+                &opaque_types,
+            );
+            per_body_info.push((num_entries, opaque_types));
+        }
+
+        for (input, (opaque_types_storage_num_entries, opaque_types)) in
+            self.collect_region_constraints_results.values_mut().zip(per_body_info)
+        {
+            if input.deferred_opaque_type_errors.is_empty() {
+                input.deferred_opaque_type_errors = apply_computed_concrete_opaque_types(
+                    &input.infcx,
+                    &input.body_owned,
+                    &input.universal_region_relations.universal_regions,
+                    &input.region_bound_pairs,
+                    &input.known_type_outlives_obligations,
+                    &mut input.constraints,
+                    &mut self.concrete_opaque_types,
+                    &opaque_types,
+                );
             }
+
+            detect_opaque_types_added_while_handling_opaque_types(
+                &input.infcx,
+                opaque_types_storage_num_entries,
+            )
         }
+    }
+
+    /// Computing defining uses of opaques may depend on the propagated region
+    /// requirements of nested bodies, while applying defining uses may introduce
+    /// additional region requirements we need to propagate.
+    ///
+    /// This results in cyclic dependency. To compute the defining uses in parent
+    /// bodies, we need the closure requirements of its nested bodies, but to check
+    /// non-defining uses in nested bodies, we may rely on the defining uses in the
+    /// parent.
+    ///
+    /// We handle this issue by applying closure requirements twice. Once using the
+    /// region constraints from before we've handled opaque types in the nested body
+    /// - which is used by the parent to handle its defining uses - and once after.
+    ///
+    /// As a performance optimization, we also eagerly finish borrowck for bodies
+    /// which don't depend on opaque types. In this case they get removed from
+    /// `collect_region_constraints_results` and the final result gets put into
+    /// `propagated_borrowck_results`.
+    fn apply_closure_requirements_modulo_opaques(&mut self) {
+        let mut closure_requirements_modulo_opaques = FxHashMap::default();
+        // We need to `mem::take` both `self.collect_region_constraints_results` and
+        // `input.deferred_closure_requirements` as we otherwise can't iterate over
+        // them while mutably using the containing struct.
+        let collect_region_constraints_results =
+            mem::take(&mut self.collect_region_constraints_results);
+        // We iterate over all bodies here, visiting nested bodies before their parent.
+        for (def_id, mut input) in collect_region_constraints_results {
+            // A body depends on opaque types if it either has any opaque type uses itself,
+            // or it has a nested body which does.
+            //
+            // If the current body does not depend on any opaque types, we eagerly compute
+            // its final result and write it into `self.propagated_borrowck_results`. This
+            // avoids having to compute its closure requirements modulo regions, as they
+            // are just the same as its final closure requirements.
+            let mut depends_on_opaques = input.infcx.has_opaque_types_in_storage();
 
-        self.nested_bodies.get(&def_id).unwrap()
+            // Iterate over all nested bodies of `input`. If that nested body depends on
+            // opaque types, we apply its closure requirements modulo opaques. Otherwise
+            // we use the closure requirements from its final borrowck result.
+            //
+            // In case we've only applied the closure requirements modulo opaques, we have
+            // to later apply its closure requirements considering opaques, so we put that
+            // nested body back into `deferred_closure_requirements`.
+            for (def_id, args, locations) in mem::take(&mut input.deferred_closure_requirements) {
+                let closure_requirements = match self.propagated_borrowck_results.get(&def_id) {
+                    None => {
+                        depends_on_opaques = true;
+                        input.deferred_closure_requirements.push((def_id, args, locations));
+                        &closure_requirements_modulo_opaques[&def_id]
+                    }
+                    Some(result) => &result.closure_requirements,
+                };
+
+                Self::apply_closure_requirements(
+                    &mut input,
+                    closure_requirements,
+                    def_id,
+                    args,
+                    locations,
+                );
+            }
+
+            // In case the current body does depend on opaques and is a nested body,
+            // we need to compute its closure requirements modulo opaques so that
+            // we're able to use it when visiting its parent later in this function.
+            //
+            // If the current body does not depend on opaque types, we finish borrowck
+            // and write its result into `propagated_borrowck_results`.
+            if depends_on_opaques {
+                if def_id != self.root_def_id {
+                    let req = Self::compute_closure_requirements_modulo_opaques(&input);
+                    closure_requirements_modulo_opaques.insert(def_id, req);
+                }
+                self.collect_region_constraints_results.insert(def_id, input);
+            } else {
+                assert!(input.deferred_closure_requirements.is_empty());
+                let result = borrowck_check_region_constraints(self, input);
+                self.propagated_borrowck_results.insert(def_id, result);
+            }
+        }
     }
 
-    pub(super) fn closure_requirements(
-        &mut self,
-        nested_body_def_id: LocalDefId,
-    ) -> &Option<ClosureRegionRequirements<'tcx>> {
-        &self.get_or_insert_nested(nested_body_def_id).closure_requirements
+    fn compute_closure_requirements_modulo_opaques(
+        input: &CollectRegionConstraintsResult<'tcx>,
+    ) -> Option<ClosureRegionRequirements<'tcx>> {
+        compute_closure_requirements_modulo_opaques(
+            &input.infcx,
+            &input.body_owned,
+            Rc::clone(&input.location_map),
+            &input.universal_region_relations,
+            &input.constraints,
+        )
     }
 
-    pub(super) fn used_mut_upvars(
-        &mut self,
-        nested_body_def_id: LocalDefId,
-    ) -> &SmallVec<[FieldIdx; 8]> {
-        &self.get_or_insert_nested(nested_body_def_id).used_mut_upvars
+    fn apply_closure_requirements(
+        input: &mut CollectRegionConstraintsResult<'tcx>,
+        closure_requirements: &Option<ClosureRegionRequirements<'tcx>>,
+        closure_def_id: LocalDefId,
+        args: ty::GenericArgsRef<'tcx>,
+        locations: Locations,
+    ) {
+        if let Some(closure_requirements) = closure_requirements {
+            constraint_conversion::ConstraintConversion::new(
+                &input.infcx,
+                &input.universal_region_relations.universal_regions,
+                &input.region_bound_pairs,
+                &input.known_type_outlives_obligations,
+                locations,
+                input.body_owned.span,      // irrelevant; will be overridden.
+                ConstraintCategory::Boring, // same as above.
+                &mut input.constraints,
+            )
+            .apply_closure_requirements(closure_requirements, closure_def_id, args);
+        }
     }
 
-    pub(super) fn finalize(self) -> Result<&'tcx ConcreteOpaqueTypes<'tcx>, ErrorGuaranteed> {
-        if let Some(guar) = self.tainted_by_errors {
-            Err(guar)
-        } else {
-            Ok(self.tcx.arena.alloc(self.concrete_opaque_types))
+    pub(super) fn do_mir_borrowck(&mut self) {
+        // The list of all bodies we need to borrowck. This first looks at
+        // nested bodies, and then their parents. This means accessing e.g.
+        // `used_mut_upvars` for a closure can assume that we've already
+        // checked that closure.
+        let all_bodies = self
+            .tcx
+            .nested_bodies_within(self.root_def_id)
+            .iter()
+            .chain(std::iter::once(self.root_def_id));
+        for def_id in all_bodies {
+            let result = borrowck_collect_region_constraints(self, def_id);
+            self.collect_region_constraints_results.insert(def_id, result);
+        }
+
+        // We now apply the closure requirements of nested bodies modulo
+        // regions. In case a body does not depend on opaque types, we
+        // eagerly check its region constraints and use the final closure
+        // requirements.
+        //
+        // We eagerly finish borrowck for bodies which don't depend on
+        // opaques.
+        self.apply_closure_requirements_modulo_opaques();
+
+        // We handle opaque type uses for all bodies together.
+        self.handle_opaque_type_uses();
+
+        // Now walk over all bodies which depend on opaque types and finish borrowck.
+        //
+        // We first apply the final closure requirements from nested bodies which also
+        // depend on opaque types and then finish borrow checking the parent. Bodies
+        // which don't depend on opaques have already been fully borrowchecked in
+        // `apply_closure_requirements_modulo_opaques` as an optimization.
+        for (def_id, mut input) in mem::take(&mut self.collect_region_constraints_results) {
+            for (def_id, args, locations) in mem::take(&mut input.deferred_closure_requirements) {
+                // We visit nested bodies before their parent, so we're already
+                // done with nested bodies at this point.
+                let closure_requirements =
+                    &self.propagated_borrowck_results[&def_id].closure_requirements;
+                Self::apply_closure_requirements(
+                    &mut input,
+                    closure_requirements,
+                    def_id,
+                    args,
+                    locations,
+                );
+            }
+
+            let result = borrowck_check_region_constraints(self, input);
+            self.propagated_borrowck_results.insert(def_id, result);
         }
     }
 }
diff --git a/compiler/rustc_borrowck/src/type_check/free_region_relations.rs b/compiler/rustc_borrowck/src/type_check/free_region_relations.rs
index 7bf2df91470..d27a73535ba 100644
--- a/compiler/rustc_borrowck/src/type_check/free_region_relations.rs
+++ b/compiler/rustc_borrowck/src/type_check/free_region_relations.rs
@@ -19,6 +19,7 @@ use crate::type_check::{Locations, MirTypeckRegionConstraints, constraint_conver
 use crate::universal_regions::UniversalRegions;
 
 #[derive(Debug)]
+#[derive(Clone)] // FIXME(#146079)
 pub(crate) struct UniversalRegionRelations<'tcx> {
     pub(crate) universal_regions: UniversalRegions<'tcx>,
 
diff --git a/compiler/rustc_borrowck/src/type_check/mod.rs b/compiler/rustc_borrowck/src/type_check/mod.rs
index 0d363935f14..02be78f90b0 100644
--- a/compiler/rustc_borrowck/src/type_check/mod.rs
+++ b/compiler/rustc_borrowck/src/type_check/mod.rs
@@ -49,7 +49,7 @@ use crate::region_infer::values::{LivenessValues, PlaceholderIndex, PlaceholderI
 use crate::session_diagnostics::{MoveUnsized, SimdIntrinsicArgConst};
 use crate::type_check::free_region_relations::{CreateResult, UniversalRegionRelations};
 use crate::universal_regions::{DefiningTy, UniversalRegions};
-use crate::{BorrowCheckRootCtxt, BorrowckInferCtxt, path_utils};
+use crate::{BorrowCheckRootCtxt, BorrowckInferCtxt, DeferredClosureRequirements, path_utils};
 
 macro_rules! span_mirbug {
     ($context:expr, $elem:expr, $($message:tt)*) => ({
@@ -67,7 +67,7 @@ macro_rules! span_mirbug {
 }
 
 pub(crate) mod canonical;
-mod constraint_conversion;
+pub(crate) mod constraint_conversion;
 pub(crate) mod free_region_relations;
 mod input_output;
 pub(crate) mod liveness;
@@ -142,6 +142,7 @@ pub(crate) fn type_check<'tcx>(
         None
     };
 
+    let mut deferred_closure_requirements = Default::default();
     let mut typeck = TypeChecker {
         root_cx,
         infcx,
@@ -157,6 +158,7 @@ pub(crate) fn type_check<'tcx>(
         polonius_facts,
         borrow_set,
         constraints: &mut constraints,
+        deferred_closure_requirements: &mut deferred_closure_requirements,
         polonius_liveness,
     };
 
@@ -191,6 +193,7 @@ pub(crate) fn type_check<'tcx>(
         universal_region_relations,
         region_bound_pairs,
         known_type_outlives_obligations,
+        deferred_closure_requirements,
         polonius_context,
     }
 }
@@ -230,6 +233,7 @@ struct TypeChecker<'a, 'tcx> {
     polonius_facts: &'a mut Option<PoloniusFacts>,
     borrow_set: &'a BorrowSet<'tcx>,
     constraints: &'a mut MirTypeckRegionConstraints<'tcx>,
+    deferred_closure_requirements: &'a mut DeferredClosureRequirements<'tcx>,
     /// When using `-Zpolonius=next`, the liveness helper data used to create polonius constraints.
     polonius_liveness: Option<PoloniusLivenessContext>,
 }
@@ -241,11 +245,13 @@ pub(crate) struct MirTypeckResults<'tcx> {
     pub(crate) universal_region_relations: Frozen<UniversalRegionRelations<'tcx>>,
     pub(crate) region_bound_pairs: Frozen<RegionBoundPairs<'tcx>>,
     pub(crate) known_type_outlives_obligations: Frozen<Vec<ty::PolyTypeOutlivesPredicate<'tcx>>>,
+    pub(crate) deferred_closure_requirements: DeferredClosureRequirements<'tcx>,
     pub(crate) polonius_context: Option<PoloniusContext>,
 }
 
 /// A collection of region constraints that must be satisfied for the
 /// program to be considered well-typed.
+#[derive(Clone)] // FIXME(#146079)
 pub(crate) struct MirTypeckRegionConstraints<'tcx> {
     /// Maps from a `ty::Placeholder` to the corresponding
     /// `PlaceholderIndex` bit that we will use for it.
@@ -2470,21 +2476,11 @@ impl<'a, 'tcx> TypeChecker<'a, 'tcx> {
         locations: Locations,
     ) -> ty::InstantiatedPredicates<'tcx> {
         let root_def_id = self.root_cx.root_def_id();
-        if let Some(closure_requirements) = &self.root_cx.closure_requirements(def_id) {
-            constraint_conversion::ConstraintConversion::new(
-                self.infcx,
-                self.universal_regions,
-                self.region_bound_pairs,
-                self.known_type_outlives_obligations,
-                locations,
-                self.body.span,             // irrelevant; will be overridden.
-                ConstraintCategory::Boring, // same as above.
-                self.constraints,
-            )
-            .apply_closure_requirements(closure_requirements, def_id, args);
-        }
+        // We will have to handle propagated closure requirements for this closure,
+        // but need to defer this until the nested body has been fully borrow checked.
+        self.deferred_closure_requirements.push((def_id, args, locations));
 
-        // Now equate closure args to regions inherited from `root_def_id`. Fixes #98589.
+        // Equate closure args to regions inherited from `root_def_id`. Fixes #98589.
         let typeck_root_args = ty::GenericArgs::identity_for_item(tcx, root_def_id);
 
         let parent_args = match tcx.def_kind(def_id) {
diff --git a/compiler/rustc_borrowck/src/universal_regions.rs b/compiler/rustc_borrowck/src/universal_regions.rs
index 296a2735533..64a7b408434 100644
--- a/compiler/rustc_borrowck/src/universal_regions.rs
+++ b/compiler/rustc_borrowck/src/universal_regions.rs
@@ -40,6 +40,7 @@ use crate::BorrowckInferCtxt;
 use crate::renumber::RegionCtxt;
 
 #[derive(Debug)]
+#[derive(Clone)] // FIXME(#146079)
 pub(crate) struct UniversalRegions<'tcx> {
     indices: UniversalRegionIndices<'tcx>,
 
@@ -200,6 +201,7 @@ impl<'tcx> DefiningTy<'tcx> {
 }
 
 #[derive(Debug)]
+#[derive(Clone)] // FIXME(#146079)
 struct UniversalRegionIndices<'tcx> {
     /// For those regions that may appear in the parameter environment
     /// ('static and early-bound regions), we maintain a map from the
diff --git a/compiler/rustc_data_structures/src/frozen.rs b/compiler/rustc_data_structures/src/frozen.rs
index 73190574667..4a60d17de2a 100644
--- a/compiler/rustc_data_structures/src/frozen.rs
+++ b/compiler/rustc_data_structures/src/frozen.rs
@@ -46,7 +46,7 @@
 //!    Frozen::freeze(new_bar)`).
 
 /// An owned immutable value.
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct Frozen<T>(T);
 
 impl<T> Frozen<T> {
diff --git a/compiler/rustc_infer/src/infer/mod.rs b/compiler/rustc_infer/src/infer/mod.rs
index d1507f08c06..d105d24bed7 100644
--- a/compiler/rustc_infer/src/infer/mod.rs
+++ b/compiler/rustc_infer/src/infer/mod.rs
@@ -989,6 +989,10 @@ impl<'tcx> InferCtxt<'tcx> {
         storage.var_infos.clone()
     }
 
+    pub fn has_opaque_types_in_storage(&self) -> bool {
+        !self.inner.borrow().opaque_type_storage.is_empty()
+    }
+
     #[instrument(level = "debug", skip(self), ret)]
     pub fn take_opaque_types(&self) -> Vec<(OpaqueTypeKey<'tcx>, OpaqueHiddenType<'tcx>)> {
         self.inner.borrow_mut().opaque_type_storage.take_opaque_types().collect()
diff --git a/compiler/rustc_middle/src/query/mod.rs b/compiler/rustc_middle/src/query/mod.rs
index 7bd8a0525a2..8ce70f75c67 100644
--- a/compiler/rustc_middle/src/query/mod.rs
+++ b/compiler/rustc_middle/src/query/mod.rs
@@ -450,6 +450,8 @@ rustc_queries! {
         }
     }
 
+    /// A list of all bodies inside of `key`, nested bodies are always stored
+    /// before their parent.
     query nested_bodies_within(
         key: LocalDefId
     ) -> &'tcx ty::List<LocalDefId> {
diff --git a/compiler/rustc_monomorphize/src/collector.rs b/compiler/rustc_monomorphize/src/collector.rs
index 6a836442c32..cffeb6f9807 100644
--- a/compiler/rustc_monomorphize/src/collector.rs
+++ b/compiler/rustc_monomorphize/src/collector.rs
@@ -1535,7 +1535,20 @@ impl<'v> RootCollector<'_, 'v> {
     fn process_nested_body(&mut self, def_id: LocalDefId) {
         match self.tcx.def_kind(def_id) {
             DefKind::Closure => {
-                if self.strategy == MonoItemCollectionStrategy::Eager
+                // for 'pub async fn foo(..)' also trying to monomorphize foo::{closure}
+                let is_pub_fn_coroutine =
+                    match *self.tcx.type_of(def_id).instantiate_identity().kind() {
+                        ty::Coroutine(cor_id, _args) => {
+                            let tcx = self.tcx;
+                            let parent_id = tcx.parent(cor_id);
+                            tcx.def_kind(parent_id) == DefKind::Fn
+                                && tcx.asyncness(parent_id).is_async()
+                                && tcx.visibility(parent_id).is_public()
+                        }
+                        ty::Closure(..) | ty::CoroutineClosure(..) => false,
+                        _ => unreachable!(),
+                    };
+                if (self.strategy == MonoItemCollectionStrategy::Eager || is_pub_fn_coroutine)
                     && !self
                         .tcx
                         .generics_of(self.tcx.typeck_root_def_id(def_id.to_def_id()))
diff --git a/compiler/rustc_ty_utils/src/nested_bodies.rs b/compiler/rustc_ty_utils/src/nested_bodies.rs
index 7c74d8eb635..11dfbad7dbb 100644
--- a/compiler/rustc_ty_utils/src/nested_bodies.rs
+++ b/compiler/rustc_ty_utils/src/nested_bodies.rs
@@ -22,9 +22,11 @@ impl<'tcx> Visitor<'tcx> for NestedBodiesVisitor<'tcx> {
     fn visit_nested_body(&mut self, id: hir::BodyId) {
         let body_def_id = self.tcx.hir_body_owner_def_id(id);
         if self.tcx.typeck_root_def_id(body_def_id.to_def_id()) == self.root_def_id {
-            self.nested_bodies.push(body_def_id);
+            // We visit nested bodies before adding the current body. This
+            // means that nested bodies are always stored before their parent.
             let body = self.tcx.hir_body(id);
             self.visit_body(body);
+            self.nested_bodies.push(body_def_id);
         }
     }
 }
diff --git a/src/tools/miri/.github/workflows/ci.yml b/src/tools/miri/.github/workflows/ci.yml
index 7d79c384f85..c0fed96d4e6 100644
--- a/src/tools/miri/.github/workflows/ci.yml
+++ b/src/tools/miri/.github/workflows/ci.yml
@@ -41,6 +41,11 @@ jobs:
             multiarch: s390x
             gcc_cross: s390x-linux-gnu
             qemu: true
+          - host_target: powerpc64le-unknown-linux-gnu
+            os: ubuntu-latest
+            multiarch: ppc64el
+            gcc_cross: powerpc64le-linux-gnu
+            qemu: true
           - host_target: aarch64-apple-darwin
             os: macos-latest
           - host_target: i686-pc-windows-msvc
diff --git a/src/tools/miri/Cargo.lock b/src/tools/miri/Cargo.lock
index b46f0f83420..4df17c83c7e 100644
--- a/src/tools/miri/Cargo.lock
+++ b/src/tools/miri/Cargo.lock
@@ -1569,9 +1569,9 @@ dependencies = [
 
 [[package]]
 name = "tracing-subscriber"
-version = "0.3.19"
+version = "0.3.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008"
+checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5"
 dependencies = [
  "sharded-slab",
  "thread_local",
diff --git a/src/tools/miri/cargo-miri/Cargo.lock b/src/tools/miri/cargo-miri/Cargo.lock
index b3f5dafab64..ea9c04a3cb5 100644
--- a/src/tools/miri/cargo-miri/Cargo.lock
+++ b/src/tools/miri/cargo-miri/Cargo.lock
@@ -429,9 +429,9 @@ dependencies = [
 
 [[package]]
 name = "rustc-build-sysroot"
-version = "0.5.9"
+version = "0.5.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fdb13874a0e55baf4ac3d49d38206aecb31a55b75d6c4d04fd850b53942c8cc8"
+checksum = "dd41ead66a69880951b2f7df3139db401d44451b4da123344d27eaa791b89c95"
 dependencies = [
  "anyhow",
  "rustc_version",
diff --git a/src/tools/miri/cargo-miri/Cargo.toml b/src/tools/miri/cargo-miri/Cargo.toml
index 77cb1df8e74..64b56ea114e 100644
--- a/src/tools/miri/cargo-miri/Cargo.toml
+++ b/src/tools/miri/cargo-miri/Cargo.toml
@@ -18,7 +18,7 @@ directories = "6"
 rustc_version = "0.4"
 serde_json = "1.0.40"
 cargo_metadata = "0.21"
-rustc-build-sysroot = "0.5.8"
+rustc-build-sysroot = "0.5.10"
 
 # Enable some feature flags that dev-dependencies need but dependencies
 # do not.  This makes `./miri install` after `./miri build` faster.
diff --git a/src/tools/miri/doc/img/perfetto_aggregate_statistics.png b/src/tools/miri/doc/img/perfetto_aggregate_statistics.png
new file mode 100644
index 00000000000..d4fd3826f47
--- /dev/null
+++ b/src/tools/miri/doc/img/perfetto_aggregate_statistics.png
Binary files differdiff --git a/src/tools/miri/doc/img/perfetto_aggregate_statistics_sql.png b/src/tools/miri/doc/img/perfetto_aggregate_statistics_sql.png
new file mode 100644
index 00000000000..bda92d3885a
--- /dev/null
+++ b/src/tools/miri/doc/img/perfetto_aggregate_statistics_sql.png
Binary files differdiff --git a/src/tools/miri/doc/img/perfetto_span.png b/src/tools/miri/doc/img/perfetto_span.png
new file mode 100644
index 00000000000..1a7184f22ae
--- /dev/null
+++ b/src/tools/miri/doc/img/perfetto_span.png
Binary files differdiff --git a/src/tools/miri/doc/img/perfetto_subname_statistics.png b/src/tools/miri/doc/img/perfetto_subname_statistics.png
new file mode 100644
index 00000000000..8c86b07e925
--- /dev/null
+++ b/src/tools/miri/doc/img/perfetto_subname_statistics.png
Binary files differdiff --git a/src/tools/miri/doc/img/perfetto_timeline.png b/src/tools/miri/doc/img/perfetto_timeline.png
new file mode 100644
index 00000000000..49f8a1fac1d
--- /dev/null
+++ b/src/tools/miri/doc/img/perfetto_timeline.png
Binary files differdiff --git a/src/tools/miri/doc/img/perfetto_visualize_argument_values.png b/src/tools/miri/doc/img/perfetto_visualize_argument_values.png
new file mode 100644
index 00000000000..1dcbacaf9cb
--- /dev/null
+++ b/src/tools/miri/doc/img/perfetto_visualize_argument_values.png
Binary files differdiff --git a/src/tools/miri/doc/img/perfetto_visualize_argument_values_misbehaving.png b/src/tools/miri/doc/img/perfetto_visualize_argument_values_misbehaving.png
new file mode 100644
index 00000000000..beeba8a4a3a
--- /dev/null
+++ b/src/tools/miri/doc/img/perfetto_visualize_argument_values_misbehaving.png
Binary files differdiff --git a/src/tools/miri/doc/img/perfetto_visualize_argument_values_sql.png b/src/tools/miri/doc/img/perfetto_visualize_argument_values_sql.png
new file mode 100644
index 00000000000..c7b163b0a57
--- /dev/null
+++ b/src/tools/miri/doc/img/perfetto_visualize_argument_values_sql.png
Binary files differdiff --git a/src/tools/miri/doc/tracing.md b/src/tools/miri/doc/tracing.md
new file mode 100644
index 00000000000..d7114af947d
--- /dev/null
+++ b/src/tools/miri/doc/tracing.md
@@ -0,0 +1,292 @@
+# Documentation for the tracing infrastructure in Miri
+
+Miri can be traced to understand how much time is spent in its various components (e.g. borrow tracker, data race checker, etc.). When tracing is enabled, running Miri will create a `.json` trace file that can be opened and analyzed in [Perfetto](https://ui.perfetto.dev/). For any questions regarding this documentation you may contact [Stypox](https://rust-lang.zulipchat.com/#narrow/dm/627563-Stypox) on Zulip.
+
+## Obtaining a trace file
+
+### From the Miri codebase
+
+All of the tracing functionality in Miri is gated by the `"tracing"` feature flag to ensure it does not create any overhead when unneeded. To compile Miri with this feature enabled, you can pass `--features=tracing` to `./miri`. Then, to make running Miri actually produce a trace file, you also need to set the `MIRI_TRACING` environment variable. For example:
+
+```sh
+MIRI_TRACING=1 ./miri run --features=tracing ./tests/pass/hello.rs
+```
+
+### From the rustc codebase
+
+If you are building Miri from within the rustc tree, you need to enable the `"tracing"` feature by adding this line to `bootstrap.toml`:
+
+```toml
+build.tool.miri.features = ["tracing"]
+```
+
+And then you could run the following:
+
+```sh
+MIRI_TRACING=1 ./x.py run miri --stage 1 --args ./src/tools/miri/tests/pass/hello.rs
+```
+
+### The trace file
+
+After running Miri with tracing enabled you will get a `.json` trace file that contains a list of all events and spans that occurred throughout the execution. The file follows [this format](https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview).
+
+## Analyzing a trace file
+
+To analyze traces you can use [Perfetto UI](https://ui.perfetto.dev/), a trace analyzer made by Google that was originally a part of the Chrome browser. Just open Perfetto and drag and drop the `.json` file there. Official documentation for the controls in the UI can be found [here](https://perfetto.dev/docs/visualization/perfetto-ui).
+
+### The timeline
+
+You will see the boxes "Global Legacy Events" and "Process 1" on the left of the workspace: after clicking on either of them their timeline will expand and you will be able to zoom in and look at individual spans (and events).
+
+- "Process 1" contains tracing spans for the various components of Miri, all in a single timeline line (e.g. borrow tracker, data race checker, etc.)
+- "Global Legacy Events" contains auxiliary spans on two separate lines that allow understanding what code is being executed at any point in time:
+    - "frame": what is the current stack frame in the interpreted program
+    - "step": what statement/terminator in the MIR of the interpreted program is being executed
+
+Spans are represented as colored boxes in the timeline, while instantaneous events are represented by tiny arrows. (Events exist because rustc and Miri also use the `tracing` crate for debug logging, and those logs turn into events in the trace.)
+
+![](./img/perfetto_timeline.png)
+
+### Span/event data
+
+You can click on a span or an event to get more information about it, including some arguments that were passed when the span/event was entered/fired. In the following screenshot you can see the details of a "layouting" span that was generated by the following line in Miri's code:
+
+```rust
+let _trace = enter_trace_span!(M, layouting::fn_abi_of_instance, ?instance, ?extra_args);
+```
+
+![](./img/perfetto_span.png)
+
+### SQL tables
+
+Perfetto supports querying the span/event database using SQL queries (see the [docs](https://perfetto.dev/docs/analysis/perfetto-sql-syntax)). Just type `:` in the search bar at the top to enter SQL mode, and then you will be able to enter SQL queries there. The relevant SQL tables are:
+- `slices`: contains all spans and events; events can be distinguished from spans since their `dur` is 0. Relevant columns are:
+    - `id`: a unique primary-key ID for the span (assigned by Perfetto, not present in the trace file)
+    - `ts` and `dur`: the beginning and duration of the span, in nanoseconds
+    - `name`: the name of the span
+    - `parent_id`: the parent span ID, or null if there is no parent (assigned by Perfetto based on the timing at which spans occur, i.e. two nested spans must be one the child of the other)
+    - `arg_set_id`: a foreign key into the table of arguments (1-to-N)
+- `args`: contains all of the arguments of the various events/spans. Relevant columns are:
+    - `arg_set_id`: the key used to join the slices and args tables
+    - `key`: the name of the argument prepended with "args."
+    - `display_value`: the value of the argument
+
+Some useful queries are provided in the following sections.
+
+### Enhancing the timeline
+
+On the "Process 1" timeline line there are some spans with the same name, that are actually generated from different places in Miri's code. In those cases the span name indicates the component that was invoked (e.g. the data race checker), but not the specific function that was run. To inspect the specific function, we store a "subname" in an argument with the same name as the span, which unfortunately can be seen only after clicking on the span.
+
+To make it quicker to look at subnames, you can add a new timeline line that specifically shows the subnames for spans with a specific name. To do so:
+1. select any span with the name you care about (call this name `$NAME`)
+2. click on the dropdown highlighted in blue next on the argument with name `$NAME` (or `args.$NAME`)
+3. click on "Visualize argument values"
+4. a new timeline line will appear with only spans originally named `$NAME`, but now with the subname displayed instead
+
+The following screenshot shows the 4 steps for spans named "data_race":
+
+![](./img/perfetto_visualize_argument_values.png)
+
+### Visualizing which "frame" or "step" is being executed
+
+Unfortunately the instructions in [Enhancing the timeline](#enhancing-the-timeline) only work well with spans under "Process 1", but misbehave with spans under "Global Legacy Events" (see the screenshot below). This might be a bug in Perfetto, but nevertheless a workaround is available:
+
+1. click on the search bar at the top and write `:` to enter SQL mode
+2. copy-paste the following SQL, replace "SPAN_NAME" at the end with either "frame" or "step" (i.e. one of the two span names under "Global Legacy Events"), and press Enter to execute it:
+    ```sql
+    select slices.id, ts, dur, track_id, category, args.string_value as name, depth, stack_id, parent_stack_id, parent_id, slices.arg_set_id, thread_ts, thread_instruction_count, thread_instruction_delta, cat, slice_id
+    from slices inner join args using (arg_set_id)
+    where args.key = "args." || name and name = "SPAN_NAME"
+    ```
+3. at the top-right of the box at the bottom, click on "Show debug track"
+4. press on "Show" in the popup that just appeared
+5. a new debug track will appear with the names of steps or frames
+
+What the SQL does is to select only spans with the name "SPAN_NAME" and keep all of the span fields untouched, except for the name which is replaced with the subname. As explained in [Enhancing the timeline](#enhancing-the-timeline), remember that the subname is stored in an argument with the same name as the span.
+
+![](./img/perfetto_visualize_argument_values_sql.png)
+
+<img src="./img/perfetto_visualize_argument_values_misbehaving.png" width="300px">
+
+### Compute aggregate statistics
+
+The simplest way to get aggregate statistics about a time range is to:
+
+1. select a time range by drag-clicking along a trace line
+2. click on the "Current Selection" tab at the bottom if it's not already open
+3. see various tables/visualizations of how much time is spent in each span by clicking on "Slices", "Pivot Table" or "Slice Flamegraph"
+
+Note that the numbers shown in the "Slices" and "Pivot Table" tabs also include nested spans, so they cannot be used to compute statistics such as "X% of time is spent in spans named Y" because two spans named Y might be nested and their duration would be counted twice. For such statistics use the method in [Compute aggregate statistics (enhanced)](#compute-aggregate-statistics-enhanced).
+
+![](./img/perfetto_aggregate_statistics.png)
+
+### Compute aggregate statistics (enhanced)
+
+The following (long but not complicated) query can be used to find out how much time is spent in spans (grouped by their name). Only spans without a parent are considered towards the computations (see `where parent_id is null`): so for example if `validate_operand` in turn calls `layouting` (which generates a nested/child span), only the `validate_operand` statistics are increased. This query also excludes auxiliary spans (see `name != "frame" and name != "step"`).
+
+Note that this query does not allow selecting a time range, but that can be done by adding a condition, e.g. `ts + dur > MIN_T and ts < MAX_T` would match only spans that intersect the range `(MIN_T, MAX_T)`. Remember that the time unit is nanoseconds.
+
+```sql
+select "TOTAL PROGRAM DURATION" as name, count(*), max(ts + dur) as "sum(dur)", 100.0 as "%", null as "min(dur)", null as "max(dur)", null as "avg(dur)", null as "stddev(dur)"
+from slices
+
+union select "TOTAL OVER ALL SPANS (excluding events)" as name, count(*), sum(dur), cast(cast(sum(dur) as float) / (select max(ts + dur) from slices) * 1000 as int) / 10.0 as "%", min(dur), max(dur), cast(avg(dur) as int) as "avg(dur)", cast(sqrt(avg(dur*dur)-avg(dur)*avg(dur)) as int) as "stddev(dur)"
+from slices
+where parent_id is null and name != "frame" and name != "step" and dur > 0
+
+union select name, count(*), sum(dur), cast(cast(sum(dur) as float) / (select max(ts + dur) from slices) * 1000 as int) / 10.0 as "%", min(dur), max(dur), cast(avg(dur) as int) as "avg(dur)", cast(sqrt(avg(dur*dur)-avg(dur)*avg(dur)) as int) as "stddev(dur)"
+from slices
+where parent_id is null and name != "frame" and name != "step"
+group by name
+order by sum(dur) desc, count(*) desc
+```
+
+This is the kind of table you would get out:
+
+![](./img/perfetto_aggregate_statistics_sql.png)
+
+### Statistics about subnames of a span
+
+Use the following SQL to see statistics about the subnames of spans with the same name (replace "SPAN_NAME" with the name of the span you want to see subname statistics of):
+
+```sql
+select args.string_value as name, count(*), sum(dur), min(dur), max(dur), cast(avg(dur) as int) as "avg(dur)", cast(sqrt(avg(dur*dur)-avg(dur)*avg(dur)) as int) as "stddev(dur)"
+from slices inner join args using (arg_set_id)
+where args.key = "args." || name and name = "SPAN_NAME"
+group by args.string_value
+order by count(*) desc
+```
+
+For example, this is the table of how much time is spent in each borrow tracker function: 
+
+![](./img/perfetto_subname_statistics.png)
+
+### Finding long periods of time without any tracing
+
+The following SQL finds the longest periods of time where time is being spent, with the ability to click on IDs in the table of results to quickly reach the corresponding place. This can be useful to spot things that use up a significant amount of time but that are not yet covered by tracing calls.
+
+```sql
+with ordered as (
+    select s1.*, row_number() over (order by s1.ts) as rn
+    from slices as s1
+    where s1.parent_id is null and s1.dur > 0 and s1.name != "frame" and s1.name != "step"
+)
+select a.ts+a.dur as ts, b.ts-a.ts-a.dur as dur, a.id, a.track_id, a.category, a.depth, a.stack_id, a.parent_stack_id, a.parent_id, a.arg_set_id, a.thread_ts, a.thread_instruction_count, a.thread_instruction_delta, a.cat, a.slice_id, "empty" as name
+from ordered as a inner join ordered as b on a.rn=b.rn-1
+order by b.ts-a.ts-a.dur desc
+```
+
+### Saving Perfetto's state as a preset
+
+Unfortunately Perfetto does not seem to support saving the UI state as a preset that can be used to repeat the same analysis on multiple traces. You have to click through the various menus or run the various SQL queries every time to setup the UI as you want.
+
+## Adding new tracing calls to the code
+
+### The "tracing" feature
+
+Miri is highly interconnected with `rustc_const_eval`, and therefore collecting proper trace data about Miri also involves adding some tracing calls within `rustc_const_eval`'s codebase. As explained in [Obtaining a trace file](#obtaining-a-trace-file), tracing calls are disabled (and optimized out) when Miri's "tracing" feature is not enabled. However, while it is possible to check for the feature from Miri's codebase, it's not possible to do so from `rustc_const_eval` (since it's a separate crate, and it's even in a precompiled `.rlib` in case of out-of-tree builds).
+
+The solution to make it possible to check whether tracing is enabled at compile time even in `rustc_const_eval` was to add a function with this signature to the `Machine` trait:
+```rust
+fn enter_trace_span(span: impl FnOnce() -> tracing::Span) -> impl EnteredTraceSpan
+```
+
+where `EnteredTraceSpan` is just a marker trait implemented by `()` and `tracing::span::EnteredSpan`. This function returns `()` by default (without calling the `span` closure), except in `MiriMachine` where if tracing is enabled it will return `span().entered()`.
+
+The code in `rustc_const_eval` calls this function when it wants to do tracing, and the compiler will (hopefully) optimize out tracing calls when tracing is disabled.
+
+### The `enter_trace_span!()` macro
+
+To add tracing to a section of code in Miri or in `rustc_const_eval`, you can use the `enter_trace_span!()` macro, which takes care of the details explained in [The "tracing" feature](#the-tracing-feature).
+
+The `enter_trace_span!()` macro accepts the same syntax as `tracing::span!()` ([documentation](https://docs.rs/tracing/latest/tracing/#using-the-macros)) except for a few customizations, and returns an already entered trace span. The returned value is a drop guard that will exit the span when dropped, so **make sure to give it a proper scope** by storing it in a variable like this:
+
+```rust
+let _trace = enter_trace_span!("My span");
+```
+
+When calling this macro from `rustc_const_eval` you need to pass a type implementing the `Machine` trait as the first argument (since it will be used to call `Machine::enter_trace_span()`). This is usually available in various parts of `rustc_const_eval` under the name `M`, since most of `rustc_const_eval`'s code is `Machine`-agnostic.
+
+```rust
+let _trace = enter_trace_span!("My span");    // from Miri
+let _trace = enter_trace_span!(M, "My span"); // from rustc_const_eval
+```
+
+You can make sense of the syntaxes explained below also by looking at this Perfetto screenshot from [Span/event data](#spanevent-data).
+
+![](./img/perfetto_span.png)
+
+### Syntax accepted by `tracing::span!()`
+
+The full documentation for the `tracing::span!()` syntax can be found [here](https://docs.rs/tracing/latest/tracing/#using-the-macros) under "Using the Macros". A few possibly confusing syntaxes are listed here:
+```rust
+// logs a span named "hello" with a field named "arg" of value 42 (works only because
+// 42 implements the tracing::Value trait, otherwise use one of the options below)
+let _trace = enter_trace_span!(M, "hello", arg = 42);
+// logs a field called "my_display_var" using the Display implementation
+let _trace = enter_trace_span!(M, "hello", %my_display_var);
+// logs a field called "my_debug_var" using the Debug implementation
+let _trace = enter_trace_span!(M, "hello", ?my_debug_var);
+```
+
+### `NAME::SUBNAME` syntax
+
+In addition to the syntax accepted by `tracing::span!()`, the `enter_trace_span!()` macro optionally allows passing the span name (i.e. the first macro argument) in the form `NAME::SUBNAME` (without quotes) to indicate that the span has name "NAME" (usually the name of the component) and has an additional more specific name "SUBNAME" (usually the function name). The latter is passed to the tracing crate as a span field with the name "NAME". This allows not being distracted by subnames when looking at the trace in Perfetto, but when deeper introspection is needed within a component, it's still possible to view the subnames directly with a few steps (see [Enhancing the timeline](#enhancing-the-timeline)).
+```rust
+// for example, the first will expand to the second
+let _trace = enter_trace_span!(M, borrow_tracker::on_stack_pop);
+let _trace = enter_trace_span!(M, "borrow_tracker", borrow_tracker = "on_stack_pop");
+```
+
+### `tracing_separate_thread` parameter
+
+Miri saves traces using the the `tracing_chrome` `tracing::Layer` so that they can be visualized in Perfetto. To instruct `tracing_chrome` to put some spans on a separate trace thread/line than other spans when viewed in Perfetto, you can pass `tracing_separate_thread = tracing::field::Empty` to the tracing macros. This is useful to separate out spans which just indicate the current step or program frame being processed by the interpreter. As explained in [The timeline](#the-timeline), those spans end up under the "Global Legacy Events" track. You should use a value of `tracing::field::Empty` so that other tracing layers (e.g. the logger) will ignore the `tracing_separate_thread` field. For example:
+```rust
+let _trace = enter_trace_span!(M, step::eval_statement, tracing_separate_thread = tracing::field::Empty);
+```
+
+### Executing something else when tracing is disabled
+
+The `EnteredTraceSpan` trait contains a `or_if_tracing_disabled()` function that you can use to e.g. log a line as an alternative to the tracing span for when tracing is disabled. For example:
+```rust
+let _trace = enter_trace_span!(M, step::eval_statement)
+    .or_if_tracing_disabled(|| tracing::info!("eval_statement"));
+```
+
+## Implementation details
+
+Here we explain how tracing is implemented internally.
+
+The events and spans generated throughout the codebase are collected by [the `tracing` crate](https://crates.io/crates/tracing), which then dispatches them to the code that writes to the trace file, but also to the logger if logging is enabled. 
+
+### Choice of tracing library
+
+The crate that was chosen for collecting traces is [tracing](https://crates.io/crates/tracing), since:
+- it is very well maintained
+- it supports various different trace formats through plug-and-play `Layer`s (in Miri we are using `tracing_chrome` to export traces for perfetto, see [The `tracing_chrome` layer](#the-tracing_chrome-layer))
+- spans and events are collected with not just their name, but also file, line, module, and any number of custom arguments
+- it was already used in Miri and rustc as a logging framework 
+
+One major drawback of the tracing crate is, however, its big overhead. Entering and exiting a span takes on the order of 100ns, and many of Miri's spans are shorter than that, so their measurements are completely off and the program execution increases significantly. E.g. at the point of writing this documentation, enabling tracing makes Miri 5x slower. Note that this used to be even worse, see [Time measurements](#time-measurements).
+
+### The `tracing_chrome` layer
+
+Miri uses [tracing-chrome](https://github.com/thoren-d/tracing-chrome) as the `Layer` that collects spans and events from the tracing crate and saves them to a file that can be opened in Perfetto. Although the crate [is published](https://crates.io/crates/tracing-chrome) on crates.io, it was not possible to depend on it from Miri, because it would bring in a separate compilation of the `tracing` crate. This is because Miri does not directly depend on `tracing`, and instead uses rustc's version through rustc-private, and apparently cargo can't realize that the same library is being built again when rustc-private is involved.
+
+So the solution was to copy-paste [the only file](https://github.com/thoren-d/tracing-chrome/blob/develop/src/lib.rs) in tracing-chrome into Miri. Nevertheless, this gave the possibility to make some changes to tracing-chrome, which you can read about in documentation at the top of [the file](https://github.com/rust-lang/miri/blob/master/src/bin/log/tracing_chrome.rs) that was copied to Miri.
+
+### Time measurements
+
+tracing-chrome originally used `std::time::Instant` to measure time, however on some x86/x86_64 Linux systems it might be unbearably slow since the underlying system call (`clock_gettime`) would take ≈1.3µs. Read more [here](https://btorpey.github.io/blog/2014/02/18/clock-sources-in-linux/) about how the Linux kernel chooses the clock source.
+
+Therefore, on x86/x86_64 Linux systems with a CPU that has an invariant TSC counter, we read from that instead to measure time, which takes only ≈13ns. There are unfortunately a lot of caveats to this approach though, as explained [in the code](https://github.com/rust-lang/miri/blob/master/src/bin/log/tracing_chrome_instant.rs) and [in the PR](https://github.com/rust-lang/miri/pull/4524). The most impactful one is that: every thread spawned in Miri that wants to trace something (which requires measuring time) needs to pin itself to a single CPU core (using `sched_setaffinity`).
+
+## Other useful stuff
+
+### Making a flamegraph
+
+After compiling Miri, you can run the following command to make a flamegraph using Linux' `perf`. It can be useful to spot functions that use up a significant amount of time but that are not yet covered by tracing calls.
+
+```sh
+perf record  --call-graph dwarf -F 999 ./miri/target/debug/miri --edition 2021 --sysroot ~/.cache/miri ./tests/pass/hashmap.rs && perf script | inferno-collapse-perf | inferno-flamegraph > flamegraph.svg
+```
diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version
index 59adc572eaa..695959e2e68 100644
--- a/src/tools/miri/rust-version
+++ b/src/tools/miri/rust-version
@@ -1 +1 @@
-f605b57042ffeb320d7ae44490113a827139b766
+e004014d1bf4c29928a0f0f9f7d0964d43606cbd
diff --git a/src/tools/miri/src/bin/log/mod.rs b/src/tools/miri/src/bin/log/mod.rs
index f3b2fdb5348..22f74dd46b5 100644
--- a/src/tools/miri/src/bin/log/mod.rs
+++ b/src/tools/miri/src/bin/log/mod.rs
@@ -1,2 +1,3 @@
 pub mod setup;
 mod tracing_chrome;
+mod tracing_chrome_instant;
diff --git a/src/tools/miri/src/bin/log/tracing_chrome.rs b/src/tools/miri/src/bin/log/tracing_chrome.rs
index 3379816550c..310887a13a5 100644
--- a/src/tools/miri/src/bin/log/tracing_chrome.rs
+++ b/src/tools/miri/src/bin/log/tracing_chrome.rs
@@ -7,12 +7,15 @@
 //! (`git log -- path/to/tracing_chrome.rs`), but in summary:
 //! - the file attributes were changed and `extern crate` was added at the top
 //! - if a tracing span has a field called "tracing_separate_thread", it will be given a separate
-//! span ID even in [TraceStyle::Threaded] mode, to make it appear on a separate line when viewing
-//! the trace in <https://ui.perfetto.dev>. This is the syntax to trigger this behavior:
+//!   span ID even in [TraceStyle::Threaded] mode, to make it appear on a separate line when viewing
+//!   the trace in <https://ui.perfetto.dev>. This is the syntax to trigger this behavior:
 //!   ```rust
 //!   tracing::info_span!("my_span", tracing_separate_thread = tracing::field::Empty, /* ... */)
 //!   ```
-//! - use i64 instead of u64 for the "id" in [ChromeLayer::get_root_id] to be compatible with Perfetto
+//! - use i64 instead of u64 for the "id" in [ChromeLayer::get_root_id] to be compatible with
+//!   Perfetto
+//! - use [ChromeLayer::with_elapsed_micros_subtracting_tracing] to make time measurements faster on
+//!   Linux x86/x86_64 and to subtract time spent tracing from the timestamps in the trace file
 //!
 //! Depending on the tracing-chrome crate from crates.io is unfortunately not possible, since it
 //! depends on `tracing_core` which conflicts with rustc_private's `tracing_core` (meaning it would
@@ -50,9 +53,22 @@ use std::{
     thread::JoinHandle,
 };
 
+use crate::log::tracing_chrome_instant::TracingChromeInstant;
+
+/// Contains thread-local data for threads that send tracing spans or events.
+struct ThreadData {
+    /// A unique ID for this thread, will populate "tid" field in the output trace file.
+    tid: usize,
+    /// A clone of [ChromeLayer::out] to avoid the expensive operation of accessing a mutex
+    /// every time. This is used to send [Message]s to the thread that saves trace data to file.
+    out: Sender<Message>,
+    /// The instant in time this thread was started. All events happening on this thread will be
+    /// saved to the trace file with a timestamp (the "ts" field) measured relative to this instant.
+    start: TracingChromeInstant,
+}
+
 thread_local! {
-    static OUT: RefCell<Option<Sender<Message>>> = const { RefCell::new(None) };
-    static TID: RefCell<Option<usize>> = const { RefCell::new(None) };
+    static THREAD_DATA: RefCell<Option<ThreadData>> = const { RefCell::new(None) };
 }
 
 type NameFn<S> = Box<dyn Fn(&EventOrSpan<'_, '_, S>) -> String + Send + Sync>;
@@ -64,7 +80,6 @@ where
     S: Subscriber + for<'span> LookupSpan<'span> + Send + Sync,
 {
     out: Arc<Mutex<Sender<Message>>>,
-    start: std::time::Instant,
     max_tid: AtomicUsize,
     include_args: bool,
     include_locations: bool,
@@ -323,7 +338,6 @@ where
 {
     fn new(mut builder: ChromeLayerBuilder<S>) -> (ChromeLayer<S>, FlushGuard) {
         let (tx, rx) = mpsc::channel();
-        OUT.with(|val| val.replace(Some(tx.clone())));
 
         let out_writer = builder
             .out_writer
@@ -443,7 +457,6 @@ where
         };
         let layer = ChromeLayer {
             out: Arc::new(Mutex::new(tx)),
-            start: std::time::Instant::now(),
             max_tid: AtomicUsize::new(0),
             name_fn: builder.name_fn.take(),
             cat_fn: builder.cat_fn.take(),
@@ -456,22 +469,7 @@ where
         (layer, guard)
     }
 
-    fn get_tid(&self) -> (usize, bool) {
-        TID.with(|value| {
-            let tid = *value.borrow();
-            match tid {
-                Some(tid) => (tid, false),
-                None => {
-                    let tid = self.max_tid.fetch_add(1, Ordering::SeqCst);
-                    value.replace(Some(tid));
-                    (tid, true)
-                }
-            }
-        })
-    }
-
-    fn get_callsite(&self, data: EventOrSpan<S>) -> Callsite {
-        let (tid, new_thread) = self.get_tid();
+    fn get_callsite(&self, data: EventOrSpan<S>, tid: usize) -> Callsite {
         let name = self.name_fn.as_ref().map(|name_fn| name_fn(&data));
         let target = self.cat_fn.as_ref().map(|cat_fn| cat_fn(&data));
         let meta = match data {
@@ -502,14 +500,6 @@ where
             (None, None)
         };
 
-        if new_thread {
-            let name = match std::thread::current().name() {
-                Some(name) => name.to_owned(),
-                None => tid.to_string(),
-            };
-            self.send_message(Message::NewThread(tid, name));
-        }
-
         Callsite {
             tid,
             name,
@@ -548,31 +538,55 @@ where
         }
     }
 
-    fn enter_span(&self, span: SpanRef<S>, ts: f64) {
-        let callsite = self.get_callsite(EventOrSpan::Span(&span));
+    fn enter_span(&self, span: SpanRef<S>, ts: f64, tid: usize, out: &Sender<Message>) {
+        let callsite = self.get_callsite(EventOrSpan::Span(&span), tid);
         let root_id = self.get_root_id(span);
-        self.send_message(Message::Enter(ts, callsite, root_id));
+        let _ignored = out.send(Message::Enter(ts, callsite, root_id));
     }
 
-    fn exit_span(&self, span: SpanRef<S>, ts: f64) {
-        let callsite = self.get_callsite(EventOrSpan::Span(&span));
+    fn exit_span(&self, span: SpanRef<S>, ts: f64, tid: usize, out: &Sender<Message>) {
+        let callsite = self.get_callsite(EventOrSpan::Span(&span), tid);
         let root_id = self.get_root_id(span);
-        self.send_message(Message::Exit(ts, callsite, root_id));
+        let _ignored = out.send(Message::Exit(ts, callsite, root_id));
     }
 
-    fn get_ts(&self) -> f64 {
-        self.start.elapsed().as_nanos() as f64 / 1000.0
-    }
+    /// Helper function that measures how much time is spent while executing `f` and accounts for it
+    /// in subsequent calls, with the aim to reduce biases in the data collected by `tracing_chrome`
+    /// by subtracting the time spent inside tracing functions from the timeline. This makes it so
+    /// that the time spent inside the `tracing_chrome` functions does not impact the timestamps
+    /// inside the trace file (i.e. `ts`), even if such functions are slow (e.g. because they need
+    /// to format arguments on the same thread those arguments are collected on, otherwise memory
+    /// safety would be broken).
+    ///
+    /// `f` is called with the microseconds elapsed since the current thread was started (**not**
+    /// since the program start!), with the current thread ID (i.e. `tid`), and with a [Sender] that
+    /// can be used to send a [Message] to the thread that collects [Message]s and saves them to the
+    /// trace file.
+    #[inline(always)]
+    fn with_elapsed_micros_subtracting_tracing(&self, f: impl Fn(f64, usize, &Sender<Message>)) {
+        THREAD_DATA.with(|value| {
+            let mut thread_data = value.borrow_mut();
+            let (ThreadData { tid, out, start }, new_thread) = match thread_data.as_mut() {
+                Some(thread_data) => (thread_data, false),
+                None => {
+                    let tid = self.max_tid.fetch_add(1, Ordering::SeqCst);
+                    let out = self.out.lock().unwrap().clone();
+                    let start = TracingChromeInstant::setup_for_thread_and_start(tid);
+                    *thread_data = Some(ThreadData { tid, out, start });
+                    (thread_data.as_mut().unwrap(), true)
+                }
+            };
 
-    fn send_message(&self, message: Message) {
-        OUT.with(move |val| {
-            if val.borrow().is_some() {
-                let _ignored = val.borrow().as_ref().unwrap().send(message);
-            } else {
-                let out = self.out.lock().unwrap().clone();
-                let _ignored = out.send(message);
-                val.replace(Some(out));
-            }
+            start.with_elapsed_micros_subtracting_tracing(|ts| {
+                if new_thread {
+                    let name = match std::thread::current().name() {
+                        Some(name) => name.to_owned(),
+                        None => tid.to_string(),
+                    };
+                    let _ignored = out.send(Message::NewThread(*tid, name));
+                }
+                f(ts, *tid, out);
+            });
         });
     }
 }
@@ -586,52 +600,58 @@ where
             return;
         }
 
-        let ts = self.get_ts();
-        self.enter_span(ctx.span(id).expect("Span not found."), ts);
+        self.with_elapsed_micros_subtracting_tracing(|ts, tid, out| {
+            self.enter_span(ctx.span(id).expect("Span not found."), ts, tid, out);
+        });
     }
 
     fn on_record(&self, id: &span::Id, values: &span::Record<'_>, ctx: Context<'_, S>) {
         if self.include_args {
-            let span = ctx.span(id).unwrap();
-            let mut exts = span.extensions_mut();
+            self.with_elapsed_micros_subtracting_tracing(|_, _, _| {
+                let span = ctx.span(id).unwrap();
+                let mut exts = span.extensions_mut();
 
-            let args = exts.get_mut::<ArgsWrapper>();
+                let args = exts.get_mut::<ArgsWrapper>();
 
-            if let Some(args) = args {
-                let args = Arc::make_mut(&mut args.args);
-                values.record(&mut JsonVisitor { object: args });
-            }
+                if let Some(args) = args {
+                    let args = Arc::make_mut(&mut args.args);
+                    values.record(&mut JsonVisitor { object: args });
+                }
+            });
         }
     }
 
     fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) {
-        let ts = self.get_ts();
-        let callsite = self.get_callsite(EventOrSpan::Event(event));
-        self.send_message(Message::Event(ts, callsite));
+        self.with_elapsed_micros_subtracting_tracing(|ts, tid, out| {
+            let callsite = self.get_callsite(EventOrSpan::Event(event), tid);
+            let _ignored = out.send(Message::Event(ts, callsite));
+        });
     }
 
     fn on_exit(&self, id: &span::Id, ctx: Context<'_, S>) {
         if let TraceStyle::Async = self.trace_style {
             return;
         }
-        let ts = self.get_ts();
-        self.exit_span(ctx.span(id).expect("Span not found."), ts);
+        self.with_elapsed_micros_subtracting_tracing(|ts, tid, out| {
+            self.exit_span(ctx.span(id).expect("Span not found."), ts, tid, out);
+        });
     }
 
     fn on_new_span(&self, attrs: &span::Attributes<'_>, id: &span::Id, ctx: Context<'_, S>) {
-        if self.include_args {
-            let mut args = Object::new();
-            attrs.record(&mut JsonVisitor { object: &mut args });
-            ctx.span(id).unwrap().extensions_mut().insert(ArgsWrapper {
-                args: Arc::new(args),
-            });
-        }
-        if let TraceStyle::Threaded = self.trace_style {
-            return;
-        }
+        self.with_elapsed_micros_subtracting_tracing(|ts, tid, out| {
+            if self.include_args {
+                let mut args = Object::new();
+                attrs.record(&mut JsonVisitor { object: &mut args });
+                ctx.span(id).unwrap().extensions_mut().insert(ArgsWrapper {
+                    args: Arc::new(args),
+                });
+            }
+            if let TraceStyle::Threaded = self.trace_style {
+                return;
+            }
 
-        let ts = self.get_ts();
-        self.enter_span(ctx.span(id).expect("Span not found."), ts);
+            self.enter_span(ctx.span(id).expect("Span not found."), ts, tid, out);
+        });
     }
 
     fn on_close(&self, id: span::Id, ctx: Context<'_, S>) {
@@ -639,8 +659,9 @@ where
             return;
         }
 
-        let ts = self.get_ts();
-        self.exit_span(ctx.span(&id).expect("Span not found."), ts);
+        self.with_elapsed_micros_subtracting_tracing(|ts, tid, out| {
+            self.exit_span(ctx.span(&id).expect("Span not found."), ts, tid, out);
+        });
     }
 }
 
diff --git a/src/tools/miri/src/bin/log/tracing_chrome_instant.rs b/src/tools/miri/src/bin/log/tracing_chrome_instant.rs
new file mode 100644
index 00000000000..f400bc20a7b
--- /dev/null
+++ b/src/tools/miri/src/bin/log/tracing_chrome_instant.rs
@@ -0,0 +1,183 @@
+//! Code in this class was in part inspired by
+//! <https://github.com/tikv/minstant/blob/27c9ec5ec90b5b67113a748a4defee0d2519518c/src/tsc_now.rs>.
+//! A useful resource is also
+//! <https://www.pingcap.com/blog/how-we-trace-a-kv-database-with-less-than-5-percent-performance-impact/>,
+//! although this file does not implement TSC synchronization but insteads pins threads to CPUs,
+//! since the former is not reliable (i.e. it might lead to non-monotonic time measurements).
+//! Another useful resource for future improvements might be measureme's time measurement utils:
+//! <https://github.com/rust-lang/measureme/blob/master/measureme/src/counters.rs>.
+//! Documentation about how the Linux kernel chooses a clock source can be found here:
+//! <https://btorpey.github.io/blog/2014/02/18/clock-sources-in-linux/>.
+#![cfg(feature = "tracing")]
+
+/// This alternative `TracingChromeInstant` implementation was made entirely to suit the needs of
+/// [crate::log::tracing_chrome], and shouldn't be used for anything else. It featues two functions:
+/// - [TracingChromeInstant::setup_for_thread_and_start], which sets up the current thread to do
+///   proper time tracking and returns a point in time to use as "t=0", and
+/// - [TracingChromeInstant::with_elapsed_micros_subtracting_tracing], which allows
+///   obtaining how much time elapsed since [TracingChromeInstant::setup_for_thread_and_start] was
+///   called while accounting for (and subtracting) the time spent inside tracing-related functions.
+///
+/// This measures time using [std::time::Instant], except for x86/x86_64 Linux machines, where
+/// [std::time::Instant] is too slow (~1.5us) and thus `rdtsc` is used instead (~5ns).
+pub enum TracingChromeInstant {
+    WallTime {
+        /// The time at which this instant was created, shifted forward to account
+        /// for time spent in tracing functions as explained in
+        /// [TracingChromeInstant::with_elapsed_micros_subtracting_tracing]'s comments.
+        start_instant: std::time::Instant,
+    },
+    #[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))]
+    Tsc {
+        /// The value in the TSC counter when this instant was created, shifted forward to account
+        /// for time spent in tracing functions as explained in
+        /// [TracingChromeInstant::with_elapsed_micros_subtracting_tracing]'s comments.
+        start_tsc: u64,
+        /// The period of the TSC counter in microseconds.
+        tsc_to_microseconds: f64,
+    },
+}
+
+impl TracingChromeInstant {
+    /// Can be thought of as the same as [std::time::Instant::now()], but also does some setup to
+    /// make TSC stable in case TSC is available. This is supposed to be called (at most) once per
+    /// thread since the thread setup takes a few milliseconds.
+    ///
+    /// WARNING: If TSC is available, `incremental_thread_id` is used to pick to which CPU to pin
+    /// the current thread. Thread IDs should be assigned contiguously starting from 0. Be aware
+    /// that the current thread will be restricted to one CPU for the rest of the execution!
+    pub fn setup_for_thread_and_start(incremental_thread_id: usize) -> TracingChromeInstant {
+        #[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))]
+        if *tsc::IS_TSC_AVAILABLE.get_or_init(tsc::is_tsc_available) {
+            // We need to lock this thread to a specific CPU, because CPUs' TSC timers might be out
+            // of sync.
+            tsc::set_cpu_affinity(incremental_thread_id);
+
+            // Can only use tsc_to_microseconds() and rdtsc() after having set the CPU affinity!
+            // We compute tsc_to_microseconds anew for every new thread just in case some CPU core
+            // has a different TSC frequency.
+            let tsc_to_microseconds = tsc::tsc_to_microseconds();
+            let start_tsc = tsc::rdtsc();
+            return TracingChromeInstant::Tsc { start_tsc, tsc_to_microseconds };
+        }
+
+        let _ = incremental_thread_id; // otherwise we get a warning when the TSC branch is disabled
+        TracingChromeInstant::WallTime { start_instant: std::time::Instant::now() }
+    }
+
+    /// Calls `f` with the time elapsed in microseconds since this [TracingChromeInstant] was built
+    /// by [TracingChromeInstant::setup_for_thread_and_start], while subtracting all time previously
+    /// spent executing other `f`s passed to this function. This behavior allows subtracting time
+    /// spent in functions that log tracing data (which `f` is supposed to be) from the tracing time
+    /// measurements.
+    ///
+    /// Note: microseconds are used as the time unit since that's what Chrome trace files should
+    /// contain, see the definition of the "ts" field in
+    /// <https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview>.
+    #[inline(always)]
+    pub fn with_elapsed_micros_subtracting_tracing(&mut self, f: impl Fn(f64)) {
+        match self {
+            TracingChromeInstant::WallTime { start_instant } => {
+                // Obtain the current time (before executing `f`).
+                let instant_before_f = std::time::Instant::now();
+
+                // Using the current time (`instant_before_f`) and the `start_instant` stored in
+                // `self`, calculate the elapsed time (in microseconds) since this instant was
+                // instantiated, accounting for any time that was previously spent executing `f`.
+                // The "accounting" part is not computed in this line, but is rather done by
+                // shifting forward the `start_instant` down below.
+                let ts = (instant_before_f - *start_instant).as_nanos() as f64 / 1000.0;
+
+                // Run the function (supposedly a function internal to the tracing infrastructure).
+                f(ts);
+
+                // Measure how much time was spent executing `f` and shift `start_instant` forward
+                // by that amount. This "removes" that time from the trace.
+                *start_instant += std::time::Instant::now() - instant_before_f;
+            }
+
+            #[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))]
+            TracingChromeInstant::Tsc { start_tsc, tsc_to_microseconds } => {
+                // the comments above also apply here, since it's the same logic
+                let tsc_before_f = tsc::rdtsc();
+                let ts = ((tsc_before_f - *start_tsc) as f64) * (*tsc_to_microseconds);
+                f(ts);
+                *start_tsc += tsc::rdtsc() - tsc_before_f;
+            }
+        }
+    }
+}
+
+#[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))]
+mod tsc {
+
+    pub static IS_TSC_AVAILABLE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
+
+    /// Reads the timestamp-counter register. Will give monotonic answers only when called from the
+    /// same thread, because the TSC of different CPUs might be out of sync.
+    #[inline(always)]
+    pub(super) fn rdtsc() -> u64 {
+        #[cfg(target_arch = "x86")]
+        use core::arch::x86::_rdtsc;
+        #[cfg(target_arch = "x86_64")]
+        use core::arch::x86_64::_rdtsc;
+
+        unsafe { _rdtsc() }
+    }
+
+    /// Estimates the frequency of the TSC counter by waiting 10ms in a busy loop and
+    /// looking at how much the TSC increased in the meantime.
+    pub(super) fn tsc_to_microseconds() -> f64 {
+        const BUSY_WAIT: std::time::Duration = std::time::Duration::from_millis(10);
+        let tsc_start = rdtsc();
+        let instant_start = std::time::Instant::now();
+        while instant_start.elapsed() < BUSY_WAIT {
+            // `thread::sleep()` is not very precise at waking up the program at the right time,
+            // so use a busy loop instead.
+            core::hint::spin_loop();
+        }
+        let tsc_end = rdtsc();
+        (BUSY_WAIT.as_nanos() as f64) / 1000.0 / ((tsc_end - tsc_start) as f64)
+    }
+
+    /// Checks whether the TSC counter is available and runs at a constant rate independently
+    /// of CPU frequency even across different power states of the CPU (i.e. checks for the
+    /// `invariant_tsc` CPUID flag).
+    pub(super) fn is_tsc_available() -> bool {
+        #[cfg(target_arch = "x86")]
+        use core::arch::x86::__cpuid;
+        #[cfg(target_arch = "x86_64")]
+        use core::arch::x86_64::__cpuid;
+
+        // implemented like https://docs.rs/raw-cpuid/latest/src/raw_cpuid/extended.rs.html#965-967
+        const LEAF: u32 = 0x80000007; // this is the leaf for "advanced power management info"
+        let cpuid = unsafe { __cpuid(LEAF) };
+        (cpuid.edx & (1 << 8)) != 0 // EDX bit 8 indicates invariant TSC
+    }
+
+    /// Forces the current thread to run on a single CPU, which ensures the TSC counter is monotonic
+    /// (since TSCs of different CPUs might be out-of-sync). `incremental_thread_id` is used to pick
+    /// to which CPU to pin the current thread, and should be an incremental number that starts from
+    /// 0.
+    pub(super) fn set_cpu_affinity(incremental_thread_id: usize) {
+        let cpu_id = match std::thread::available_parallelism() {
+            Ok(available_parallelism) => incremental_thread_id % available_parallelism,
+            _ => panic!("Could not determine CPU count to properly set CPU affinity"),
+        };
+
+        let mut set = unsafe { std::mem::zeroed::<libc::cpu_set_t>() };
+        unsafe { libc::CPU_SET(cpu_id, &mut set) };
+
+        // Set the current thread's core affinity.
+        if unsafe {
+            libc::sched_setaffinity(
+                0, // Defaults to current thread
+                size_of::<libc::cpu_set_t>(),
+                &set as *const _,
+            )
+        } != 0
+        {
+            panic!("Could not set CPU affinity")
+        }
+    }
+}
diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
index ad2a67160f4..bed65440dc9 100644
--- a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
+++ b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
@@ -125,81 +125,64 @@ pub struct NewPermission {
     /// Whether a read access should be performed on the non-frozen
     /// part on a retag.
     nonfreeze_access: bool,
+    /// Permission for memory outside the range.
+    outside_perm: Permission,
     /// Whether this pointer is part of the arguments of a function call.
     /// `protector` is `Some(_)` for all pointers marked `noalias`.
     protector: Option<ProtectorKind>,
 }
 
 impl<'tcx> NewPermission {
-    /// Determine NewPermission of the reference from the type of the pointee.
-    fn from_ref_ty(
+    /// Determine NewPermission of the reference/Box from the type of the pointee.
+    ///
+    /// A `ref_mutability` of `None` indicates a `Box` type.
+    fn new(
         pointee: Ty<'tcx>,
-        mutability: Mutability,
-        kind: RetagKind,
+        ref_mutability: Option<Mutability>,
+        retag_kind: RetagKind,
         cx: &crate::MiriInterpCx<'tcx>,
     ) -> Option<Self> {
         let ty_is_unpin = pointee.is_unpin(*cx.tcx, cx.typing_env());
-        let is_protected = kind == RetagKind::FnEntry;
-        let protector = is_protected.then_some(ProtectorKind::StrongProtector);
-
-        Some(match mutability {
-            Mutability::Mut if ty_is_unpin =>
-                NewPermission {
-                    freeze_perm: Permission::new_reserved(
-                        /* ty_is_freeze */ true,
-                        is_protected,
-                    ),
-                    freeze_access: true,
-                    nonfreeze_perm: Permission::new_reserved(
-                        /* ty_is_freeze */ false,
-                        is_protected,
-                    ),
-                    // If we have a mutable reference, then the non-frozen part will
-                    // have state `ReservedIM` or `Reserved`, which can have an initial read access
-                    // performed on it because you cannot have multiple mutable borrows.
-                    nonfreeze_access: true,
-                    protector,
-                },
-            Mutability::Not =>
-                NewPermission {
-                    freeze_perm: Permission::new_frozen(),
-                    freeze_access: true,
-                    nonfreeze_perm: Permission::new_cell(),
-                    // If it is a shared reference, then the non-frozen
-                    // part will have state `Cell`, which should not have an initial access,
-                    // as this can cause data races when using thread-safe data types like
-                    // `Mutex<T>`.
-                    nonfreeze_access: false,
-                    protector,
-                },
-            _ => return None,
-        })
-    }
+        let ty_is_freeze = pointee.is_freeze(*cx.tcx, cx.typing_env());
+        let is_protected = retag_kind == RetagKind::FnEntry;
 
-    /// Compute permission for `Box`-like type (`Box` always, and also `Unique` if enabled).
-    /// These pointers allow deallocation so need a different kind of protector not handled
-    /// by `from_ref_ty`.
-    fn from_unique_ty(
-        ty: Ty<'tcx>,
-        kind: RetagKind,
-        cx: &crate::MiriInterpCx<'tcx>,
-    ) -> Option<Self> {
-        let pointee = ty.builtin_deref(true).unwrap();
-        pointee.is_unpin(*cx.tcx, cx.typing_env()).then_some(()).map(|()| {
-            // Regular `Unpin` box, give it `noalias` but only a weak protector
-            // because it is valid to deallocate it within the function.
-            let is_protected = kind == RetagKind::FnEntry;
-            let protector = is_protected.then_some(ProtectorKind::WeakProtector);
-            NewPermission {
-                freeze_perm: Permission::new_reserved(/* ty_is_freeze */ true, is_protected),
-                freeze_access: true,
-                nonfreeze_perm: Permission::new_reserved(
-                    /* ty_is_freeze */ false,
-                    is_protected,
-                ),
-                nonfreeze_access: true,
-                protector,
-            }
+        if matches!(ref_mutability, Some(Mutability::Mut) | None if !ty_is_unpin) {
+            // Mutable reference / Box to pinning type: retagging is a NOP.
+            // FIXME: with `UnsafePinned`, this should do proper per-byte tracking.
+            return None;
+        }
+
+        let freeze_perm = match ref_mutability {
+            // Shared references are frozen.
+            Some(Mutability::Not) => Permission::new_frozen(),
+            // Mutable references and Boxes are reserved.
+            _ => Permission::new_reserved_frz(),
+        };
+        let nonfreeze_perm = match ref_mutability {
+            // Shared references are "transparent".
+            Some(Mutability::Not) => Permission::new_cell(),
+            // *Protected* mutable references and boxes are reserved without regarding for interior mutability.
+            _ if is_protected => Permission::new_reserved_frz(),
+            // Unprotected mutable references and boxes start in `ReservedIm`.
+            _ => Permission::new_reserved_im(),
+        };
+
+        // Everything except for `Cell` gets an initial access.
+        let initial_access = |perm: &Permission| !perm.is_cell();
+
+        Some(NewPermission {
+            freeze_perm,
+            freeze_access: initial_access(&freeze_perm),
+            nonfreeze_perm,
+            nonfreeze_access: initial_access(&nonfreeze_perm),
+            outside_perm: if ty_is_freeze { freeze_perm } else { nonfreeze_perm },
+            protector: is_protected.then_some(if ref_mutability.is_some() {
+                // Strong protector for references
+                ProtectorKind::StrongProtector
+            } else {
+                // Weak protector for boxes
+                ProtectorKind::WeakProtector
+            }),
         })
     }
 }
@@ -313,30 +296,20 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
         let span = this.machine.current_span();
 
-        // Store initial permissions and their corresponding range.
-        let mut perms_map: DedupRangeMap<LocationState> = DedupRangeMap::new(
-            ptr_size,
-            LocationState::new_accessed(Permission::new_disabled(), IdempotentForeignAccess::None), // this will be overwritten
-        );
-        // Keep track of whether the node has any part that allows for interior mutability.
-        // FIXME: This misses `PhantomData<UnsafeCell<T>>` which could be considered a marker
-        // for requesting interior mutability.
-        let mut has_unsafe_cell = false;
-
         // When adding a new node, the SIFA of its parents needs to be updated, potentially across
         // the entire memory range. For the parts that are being accessed below, the access itself
-        // trivially takes care of that. However, we have to do some more work to also deal with
-        // the parts that are not being accessed. Specifically what we do is that we
-        // call `update_last_accessed_after_retag` on the SIFA of the permission set for the part of
-        // memory outside `perm_map` -- so that part is definitely taken care of. The remaining concern
-        // is the part of memory that is in the range of `perms_map`, but not accessed below.
-        // There we have two cases:
-        // * If we do have an `UnsafeCell` (`has_unsafe_cell` becomes true), then the non-accessed part
-        //   uses `nonfreeze_perm`, so the `nonfreeze_perm` initialized parts are also fine. We enforce
-        //   the `freeze_perm` parts to be accessed, and thus everything is taken care of.
-        // * If there is no `UnsafeCell`, then `freeze_perm` is used everywhere (both inside and outside the initial range),
-        //   and we update everything to have the `freeze_perm`'s SIFA, so there are no issues. (And this assert below is not
-        //   actually needed in this case).
+        // trivially takes care of that. However, we have to do some more work to also deal with the
+        // parts that are not being accessed. Specifically what we do is that we call
+        // `update_last_accessed_after_retag` on the SIFA of the permission set for the part of
+        // memory outside `perm_map` -- so that part is definitely taken care of. The remaining
+        // concern is the part of memory that is in the range of `perms_map`, but not accessed
+        // below. There we have two cases:
+        // * If the type is `!Freeze`, then the non-accessed part uses `nonfreeze_perm`, so the
+        //   `nonfreeze_perm` initialized parts are also fine. We enforce the `freeze_perm` parts to
+        //   be accessed via the assert below, and thus everything is taken care of.
+        // * If the type is `Freeze`, then `freeze_perm` is used everywhere (both inside and outside
+        //   the initial range), and we update everything to have the `freeze_perm`'s SIFA, so there
+        //   are no issues. (And this assert below is not actually needed in this case).
         assert!(new_perm.freeze_access);
 
         let protected = new_perm.protector.is_some();
@@ -350,66 +323,48 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             .get_tree_borrows_params()
             .precise_interior_mut;
 
-        let default_perm = if !precise_interior_mut {
-            // NOTE: Using `ty_is_freeze` doesn't give the same result as going through the range
-            // and computing `has_unsafe_cell`.  This is because of zero-sized `UnsafeCell`, for which
-            // `has_unsafe_cell` is false, but `!ty_is_freeze` is true.
-            let ty_is_freeze = place.layout.ty.is_freeze(*this.tcx, this.typing_env());
-            let (perm, access) = if ty_is_freeze {
+        // Compute initial "inside" permissions.
+        let loc_state = |frozen: bool| -> LocationState {
+            let (perm, access) = if frozen {
                 (new_perm.freeze_perm, new_perm.freeze_access)
             } else {
                 (new_perm.nonfreeze_perm, new_perm.nonfreeze_access)
             };
             let sifa = perm.strongest_idempotent_foreign_access(protected);
-            let new_loc = if access {
+            if access {
                 LocationState::new_accessed(perm, sifa)
             } else {
                 LocationState::new_non_accessed(perm, sifa)
-            };
-
-            for (_loc_range, loc) in perms_map.iter_mut_all() {
-                *loc = new_loc;
             }
-
-            perm
+        };
+        let perms_map = if !precise_interior_mut {
+            // For `!Freeze` types, just pretend the entire thing is an `UnsafeCell`.
+            let ty_is_freeze = place.layout.ty.is_freeze(*this.tcx, this.typing_env());
+            let state = loc_state(ty_is_freeze);
+            DedupRangeMap::new(ptr_size, state)
         } else {
+            // The initial state will be overwritten by the visitor below.
+            let mut perms_map: DedupRangeMap<LocationState> = DedupRangeMap::new(
+                ptr_size,
+                LocationState::new_accessed(
+                    Permission::new_disabled(),
+                    IdempotentForeignAccess::None,
+                ),
+            );
             this.visit_freeze_sensitive(place, ptr_size, |range, frozen| {
-                has_unsafe_cell = has_unsafe_cell || !frozen;
-
-                // We are only ever `Frozen` inside the frozen bits.
-                let (perm, access) = if frozen {
-                    (new_perm.freeze_perm, new_perm.freeze_access)
-                } else {
-                    (new_perm.nonfreeze_perm, new_perm.nonfreeze_access)
-                };
-                let sifa = perm.strongest_idempotent_foreign_access(protected);
-                // NOTE: Currently, `access` is false if and only if `perm` is Cell, so this `if`
-                // doesn't not change whether any code is UB or not. We could just always use
-                // `new_accessed` and everything would stay the same. But that seems conceptually
-                // odd, so we keep the initial "accessed" bit of the `LocationState` in sync with whether
-                // a read access is performed below.
-                let new_loc = if access {
-                    LocationState::new_accessed(perm, sifa)
-                } else {
-                    LocationState::new_non_accessed(perm, sifa)
-                };
-
-                // Store initial permissions.
+                let state = loc_state(frozen);
                 for (_loc_range, loc) in perms_map.iter_mut(range.start, range.size) {
-                    *loc = new_loc;
+                    *loc = state;
                 }
-
                 interp_ok(())
             })?;
-
-            // Allow lazily writing to surrounding data if we found an `UnsafeCell`.
-            if has_unsafe_cell { new_perm.nonfreeze_perm } else { new_perm.freeze_perm }
+            perms_map
         };
 
         let alloc_extra = this.get_alloc_extra(alloc_id)?;
         let mut tree_borrows = alloc_extra.borrow_tracker_tb().borrow_mut();
 
-        for (perm_range, perm) in perms_map.iter_mut_all() {
+        for (perm_range, perm) in perms_map.iter_all() {
             if perm.is_accessed() {
                 // Some reborrows incur a read access to the parent.
                 // Adjust range to be relative to allocation start (rather than to `place`).
@@ -447,7 +402,7 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             orig_tag,
             new_tag,
             perms_map,
-            default_perm,
+            new_perm.outside_perm,
             protected,
             span,
         )?;
@@ -514,7 +469,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         let this = self.eval_context_mut();
         let new_perm = match val.layout.ty.kind() {
             &ty::Ref(_, pointee, mutability) =>
-                NewPermission::from_ref_ty(pointee, mutability, kind, this),
+                NewPermission::new(pointee, Some(mutability), kind, this),
             _ => None,
         };
         if let Some(new_perm) = new_perm {
@@ -571,8 +526,9 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             fn visit_box(&mut self, box_ty: Ty<'tcx>, place: &PlaceTy<'tcx>) -> InterpResult<'tcx> {
                 // Only boxes for the global allocator get any special treatment.
                 if box_ty.is_box_global(*self.ecx.tcx) {
+                    let pointee = place.layout.ty.builtin_deref(true).unwrap();
                     let new_perm =
-                        NewPermission::from_unique_ty(place.layout.ty, self.kind, self.ecx);
+                        NewPermission::new(pointee, /* not a ref */ None, self.kind, self.ecx);
                     self.retag_ptr_inplace(place, new_perm)?;
                 }
                 interp_ok(())
@@ -591,7 +547,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 match place.layout.ty.kind() {
                     &ty::Ref(_, pointee, mutability) => {
                         let new_perm =
-                            NewPermission::from_ref_ty(pointee, mutability, self.kind, self.ecx);
+                            NewPermission::new(pointee, Some(mutability), self.kind, self.ecx);
                         self.retag_ptr_inplace(place, new_perm)?;
                     }
                     ty::RawPtr(_, _) => {
@@ -643,14 +599,11 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             // never be ReservedIM, the value of the `ty_is_freeze`
             // argument doesn't matter
             // (`ty_is_freeze || true` in `new_reserved` will always be `true`).
-            freeze_perm: Permission::new_reserved(
-                /* ty_is_freeze */ true, /* protected */ true,
-            ),
+            freeze_perm: Permission::new_reserved_frz(),
             freeze_access: true,
-            nonfreeze_perm: Permission::new_reserved(
-                /* ty_is_freeze */ false, /* protected */ true,
-            ),
+            nonfreeze_perm: Permission::new_reserved_frz(),
             nonfreeze_access: true,
+            outside_perm: Permission::new_reserved_frz(),
             protector: Some(ProtectorKind::StrongProtector),
         };
         this.tb_retag_place(place, new_perm)
diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/perms.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/perms.rs
index 38863ca0734..390435e58d1 100644
--- a/src/tools/miri/src/borrow_tracker/tree_borrows/perms.rs
+++ b/src/tools/miri/src/borrow_tracker/tree_borrows/perms.rs
@@ -272,28 +272,15 @@ impl Permission {
 
     /// Default initial permission of a reborrowed mutable reference that is either
     /// protected or not interior mutable.
-    fn new_reserved_frz() -> Self {
+    pub fn new_reserved_frz() -> Self {
         Self { inner: ReservedFrz { conflicted: false } }
     }
 
     /// Default initial permission of an unprotected interior mutable reference.
-    fn new_reserved_im() -> Self {
+    pub fn new_reserved_im() -> Self {
         Self { inner: ReservedIM }
     }
 
-    /// Wrapper around `new_reserved_frz` and `new_reserved_im` that decides
-    /// which to call based on the interior mutability and the retag kind (whether there
-    /// is a protector is relevant because being protected takes priority over being
-    /// interior mutable)
-    pub fn new_reserved(ty_is_freeze: bool, protected: bool) -> Self {
-        // As demonstrated by `tests/fail/tree_borrows/reservedim_spurious_write.rs`,
-        // interior mutability and protectors interact poorly.
-        // To eliminate the case of Protected Reserved IM we override interior mutability
-        // in the case of a protected reference: protected references are always considered
-        // "freeze" in their reservation phase.
-        if ty_is_freeze || protected { Self::new_reserved_frz() } else { Self::new_reserved_im() }
-    }
-
     /// Default initial permission of a reborrowed shared reference.
     pub fn new_frozen() -> Self {
         Self { inner: Frozen }
diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/tree/tests.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/tree/tests.rs
index bb3fc2d80b3..d9b3696e4f8 100644
--- a/src/tools/miri/src/borrow_tracker/tree_borrows/tree/tests.rs
+++ b/src/tools/miri/src/borrow_tracker/tree_borrows/tree/tests.rs
@@ -610,7 +610,7 @@ mod spurious_read {
             },
             y: LocStateProt {
                 state: LocationState::new_non_accessed(
-                    Permission::new_reserved(/* freeze */ true, /* protected */ true),
+                    Permission::new_reserved_frz(),
                     IdempotentForeignAccess::default(),
                 ),
                 prot: true,
diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs
index 0b2ce900414..0136de55216 100644
--- a/src/tools/miri/src/machine.rs
+++ b/src/tools/miri/src/machine.rs
@@ -1077,7 +1077,8 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
                 .target_features
                 .iter()
                 .filter(|&feature| {
-                    feature.kind != TargetFeatureKind::Implied && !ecx.tcx.sess.target_features.contains(&feature.name)
+                    feature.kind != TargetFeatureKind::Implied
+                        && !ecx.tcx.sess.target_features.contains(&feature.name)
                 })
                 .fold(String::new(), |mut s, feature| {
                     if !s.is_empty() {
diff --git a/src/tools/miri/src/shims/unix/fd.rs b/src/tools/miri/src/shims/unix/fd.rs
index e226a55d8b1..9fbecffc55d 100644
--- a/src/tools/miri/src/shims/unix/fd.rs
+++ b/src/tools/miri/src/shims/unix/fd.rs
@@ -264,11 +264,19 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             return this.set_last_error_and_return(LibcError("EBADF"), dest);
         };
 
+        // Handle the zero-sized case. The man page says:
+        // > If count is zero, read() may detect the errors described below.  In the absence of any
+        // > errors, or if read() does not check for errors, a read() with a count of 0 returns zero
+        // > and has no other effects.
+        if count == 0 {
+            this.write_null(dest)?;
+            return interp_ok(());
+        }
         // Non-deterministically decide to further reduce the count, simulating a partial read (but
-        // never to 0, that has different behavior).
+        // never to 0, that would indicate EOF).
         let count =
             if fd.nondet_short_accesses() && count >= 2 && this.machine.rng.get_mut().random() {
-                count / 2
+                count / 2 // since `count` is at least 2, the result is still at least 1
             } else {
                 count
             };
@@ -338,8 +346,20 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             return this.set_last_error_and_return(LibcError("EBADF"), dest);
         };
 
-        // Non-deterministically decide to further reduce the count, simulating a partial write (but
-        // never to 0, that has different behavior).
+        // Handle the zero-sized case. The man page says:
+        // > If count is zero and fd refers to a regular file, then write() may return a failure
+        // > status if one of the errors below is detected.  If no errors are detected, or error
+        // > detection is not performed, 0 is returned without causing any other effect.   If  count
+        // > is  zero  and  fd refers to a file other than a regular file, the results are not
+        // > specified.
+        if count == 0 {
+            // For now let's not open the can of worms of what exactly "not specified" could mean...
+            this.write_null(dest)?;
+            return interp_ok(());
+        }
+        // Non-deterministically decide to further reduce the count, simulating a partial write.
+        // We avoid reducing the write size to 0: the docs seem to be entirely fine with that,
+        // but the standard library is not (https://github.com/rust-lang/rust/issues/145959).
         let count =
             if fd.nondet_short_accesses() && count >= 2 && this.machine.rng.get_mut().random() {
                 count / 2
diff --git a/src/tools/miri/src/shims/windows/handle.rs b/src/tools/miri/src/shims/windows/handle.rs
index 8a965ea316d..92d6321bed1 100644
--- a/src/tools/miri/src/shims/windows/handle.rs
+++ b/src/tools/miri/src/shims/windows/handle.rs
@@ -289,9 +289,11 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         }
 
         if this.ptr_is_null(target_handle_ptr)? {
-            throw_unsup_format!(
-                "`DuplicateHandle` `lpTargetHandle` parameter is null, which is unsupported"
-            );
+            throw_machine_stop!(TerminationInfo::Abort(
+                "`DuplicateHandle` `lpTargetHandle` parameter must not be null, as otherwise the \
+                newly created handle is leaked"
+                    .to_string()
+            ));
         }
 
         if options != this.eval_windows("c", "DUPLICATE_SAME_ACCESS") {
diff --git a/src/tools/miri/tests/deps/Cargo.lock b/src/tools/miri/tests/deps/Cargo.lock
index 4b783ebdc4e..65ca4215c60 100644
--- a/src/tools/miri/tests/deps/Cargo.lock
+++ b/src/tools/miri/tests/deps/Cargo.lock
@@ -296,9 +296,9 @@ dependencies = [
 
 [[package]]
 name = "slab"
-version = "0.4.10"
+version = "0.4.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d"
+checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
 
 [[package]]
 name = "socket2"
diff --git a/src/tools/miri/tests/fail/tree_borrows/frozen-lazy-write-to-surrounding.rs b/src/tools/miri/tests/fail/tree_borrows/frozen-lazy-write-to-surrounding.rs
new file mode 100644
index 00000000000..7d51050f32b
--- /dev/null
+++ b/src/tools/miri/tests/fail/tree_borrows/frozen-lazy-write-to-surrounding.rs
@@ -0,0 +1,9 @@
+//@compile-flags: -Zmiri-tree-borrows
+
+fn main() {
+    // Since the "inside" part is `!Freeze`, the permission to mutate is gone.
+    let pair = ((), 1);
+    let x = &pair.0;
+    let ptr = (&raw const *x).cast::<i32>().cast_mut();
+    unsafe { ptr.write(0) }; //~ERROR: /write access .* forbidden/
+}
diff --git a/src/tools/miri/tests/fail/tree_borrows/frozen-lazy-write-to-surrounding.stderr b/src/tools/miri/tests/fail/tree_borrows/frozen-lazy-write-to-surrounding.stderr
new file mode 100644
index 00000000000..e9800468c57
--- /dev/null
+++ b/src/tools/miri/tests/fail/tree_borrows/frozen-lazy-write-to-surrounding.stderr
@@ -0,0 +1,21 @@
+error: Undefined Behavior: write access through <TAG> at ALLOC[0x0] is forbidden
+  --> tests/fail/tree_borrows/frozen-lazy-write-to-surrounding.rs:LL:CC
+   |
+LL |     unsafe { ptr.write(0) };
+   |              ^^^^^^^^^^^^ Undefined Behavior occurred here
+   |
+   = help: this indicates a potential bug in the program: it performed an invalid operation, but the Tree Borrows rules it violated are still experimental
+   = help: see https://github.com/rust-lang/unsafe-code-guidelines/blob/master/wip/tree-borrows.md for further information
+   = help: the accessed tag <TAG> has state Frozen which forbids this child write access
+help: the accessed tag <TAG> was created here, in the initial state Frozen
+  --> tests/fail/tree_borrows/frozen-lazy-write-to-surrounding.rs:LL:CC
+   |
+LL |     let x = &pair.0;
+   |             ^^^^^^^
+   = note: BACKTRACE (of the first span):
+   = note: inside `main` at tests/fail/tree_borrows/frozen-lazy-write-to-surrounding.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to 1 previous error
+
diff --git a/src/tools/miri/tests/pass-dep/shims/windows-fs.rs b/src/tools/miri/tests/pass-dep/shims/windows-fs.rs
index 4ca19046b67..7b756603d92 100644
--- a/src/tools/miri/tests/pass-dep/shims/windows-fs.rs
+++ b/src/tools/miri/tests/pass-dep/shims/windows-fs.rs
@@ -2,20 +2,20 @@
 //@compile-flags: -Zmiri-disable-isolation
 #![allow(nonstandard_style)]
 
-use std::io::{ErrorKind, Read, Write};
+use std::io::{ErrorKind, Read, Seek, SeekFrom, Write};
 use std::os::windows::ffi::OsStrExt;
-use std::os::windows::io::AsRawHandle;
+use std::os::windows::io::{AsRawHandle, FromRawHandle};
 use std::path::Path;
-use std::{fs, ptr};
+use std::{fs, mem, ptr};
 
 #[path = "../../utils/mod.rs"]
 mod utils;
 
 use windows_sys::Wdk::Storage::FileSystem::{NtReadFile, NtWriteFile};
 use windows_sys::Win32::Foundation::{
-    CloseHandle, ERROR_ACCESS_DENIED, ERROR_ALREADY_EXISTS, ERROR_IO_DEVICE, GENERIC_READ,
-    GENERIC_WRITE, GetLastError, RtlNtStatusToDosError, STATUS_ACCESS_DENIED,
-    STATUS_IO_DEVICE_ERROR, STATUS_SUCCESS, SetLastError,
+    CloseHandle, DUPLICATE_SAME_ACCESS, DuplicateHandle, ERROR_ACCESS_DENIED, ERROR_ALREADY_EXISTS,
+    ERROR_IO_DEVICE, FALSE, GENERIC_READ, GENERIC_WRITE, GetLastError, RtlNtStatusToDosError,
+    STATUS_ACCESS_DENIED, STATUS_IO_DEVICE_ERROR, STATUS_SUCCESS, SetLastError,
 };
 use windows_sys::Win32::Storage::FileSystem::{
     BY_HANDLE_FILE_INFORMATION, CREATE_ALWAYS, CREATE_NEW, CreateFileW, DeleteFileW,
@@ -24,6 +24,7 @@ use windows_sys::Win32::Storage::FileSystem::{
     FILE_SHARE_WRITE, GetFileInformationByHandle, OPEN_ALWAYS, OPEN_EXISTING, SetFilePointerEx,
 };
 use windows_sys::Win32::System::IO::IO_STATUS_BLOCK;
+use windows_sys::Win32::System::Threading::GetCurrentProcess;
 
 fn main() {
     unsafe {
@@ -36,6 +37,7 @@ fn main() {
         test_ntstatus_to_dos();
         test_file_read_write();
         test_file_seek();
+        test_dup_handle();
     }
 }
 
@@ -273,6 +275,39 @@ unsafe fn test_file_read_write() {
     assert_eq!(GetLastError(), 1234);
 }
 
+unsafe fn test_dup_handle() {
+    let temp = utils::tmp().join("test_dup.txt");
+
+    let mut file1 = fs::File::options().read(true).write(true).create(true).open(&temp).unwrap();
+
+    file1.write_all(b"Hello, World!\n").unwrap();
+    file1.seek(SeekFrom::Start(0)).unwrap();
+
+    let first_handle = file1.as_raw_handle();
+
+    let cur_proc = GetCurrentProcess();
+    let mut second_handle = mem::zeroed();
+    let res = DuplicateHandle(
+        cur_proc,
+        first_handle,
+        cur_proc,
+        &mut second_handle,
+        0,
+        FALSE,
+        DUPLICATE_SAME_ACCESS,
+    );
+    assert!(res != 0);
+
+    let mut buf1 = [0; 5];
+    file1.read(&mut buf1).unwrap();
+    assert_eq!(&buf1, b"Hello");
+
+    let mut file2 = fs::File::from_raw_handle(second_handle);
+    let mut buf2 = [0; 5];
+    file2.read(&mut buf2).unwrap();
+    assert_eq!(&buf2, b", Wor");
+}
+
 unsafe fn test_file_seek() {
     let temp = utils::tmp().join("test_file_seek.txt");
     let mut file = fs::File::options().create(true).write(true).read(true).open(&temp).unwrap();
diff --git a/src/tools/miri/tests/pass/both_borrows/basic_aliasing_model.rs b/src/tools/miri/tests/pass/both_borrows/basic_aliasing_model.rs
index 6a625e597df..82976326a8d 100644
--- a/src/tools/miri/tests/pass/both_borrows/basic_aliasing_model.rs
+++ b/src/tools/miri/tests/pass/both_borrows/basic_aliasing_model.rs
@@ -23,7 +23,8 @@ fn main() {
     not_unpin_not_protected();
     write_does_not_invalidate_all_aliases();
     box_into_raw_allows_interior_mutable_alias();
-    cell_inside_struct()
+    cell_inside_struct();
+    zst();
 }
 
 // Make sure that reading from an `&mut` does, like reborrowing to `&`,
@@ -287,3 +288,22 @@ fn cell_inside_struct() {
     // Writing to `field1`, which is reserved, should also be allowed.
     (*a).field1 = 88;
 }
+
+/// ZST reborrows on various kinds of dangling pointers are valid.
+fn zst() {
+    unsafe {
+        // Integer pointer.
+        let ptr = ptr::without_provenance_mut::<()>(15);
+        let _ref = &mut *ptr;
+
+        // Out-of-bounds pointer.
+        let mut b = Box::new(0u8);
+        let ptr = (&raw mut *b).wrapping_add(15) as *mut ();
+        let _ref = &mut *ptr;
+
+        // Deallocated pointer.
+        let ptr = &raw mut *b as *mut ();
+        drop(b);
+        let _ref = &mut *ptr;
+    }
+}
diff --git a/src/tools/miri/tests/pass/shims/fs.rs b/src/tools/miri/tests/pass/shims/fs.rs
index e7f11c54704..022dcc5dcba 100644
--- a/src/tools/miri/tests/pass/shims/fs.rs
+++ b/src/tools/miri/tests/pass/shims/fs.rs
@@ -72,7 +72,9 @@ fn test_file() {
 
     // Writing to a file opened for reading should error (and not stop interpretation). std does not
     // categorize the error so we don't check for details.
-    file.write(&[]).unwrap_err();
+    file.write(&[0]).unwrap_err();
+    // However, writing 0 bytes can succeed or fail.
+    let _ignore = file.write(&[]);
 
     // Removing file should succeed.
     remove_file(&path).unwrap();
diff --git a/src/tools/miri/tests/pass/tree_borrows/cell-lazy-write-to-surrounding.rs b/src/tools/miri/tests/pass/tree_borrows/cell-lazy-write-to-surrounding.rs
index abe08f2cd22..7352784ac7a 100644
--- a/src/tools/miri/tests/pass/tree_borrows/cell-lazy-write-to-surrounding.rs
+++ b/src/tools/miri/tests/pass/tree_borrows/cell-lazy-write-to-surrounding.rs
@@ -14,9 +14,11 @@ fn main() {
     foo(&arr[0]);
 
     let pair = (Cell::new(1), 1);
-    // TODO: Ideally, this would result in UB since the second element
-    // in `pair` is Frozen.  We would need some way to express a
-    // "shared reference with permission to access surrounding
-    // interior mutable data".
     foo(&pair.0);
+
+    // As long as the "inside" part is `!Freeze`, the permission to mutate the "outside" is preserved.
+    let pair = (Cell::new(()), 1);
+    let x = &pair.0;
+    let ptr = (&raw const *x).cast::<i32>().cast_mut();
+    unsafe { ptr.write(0) };
 }
diff --git a/src/tools/miri/tests/utils/libc.rs b/src/tools/miri/tests/utils/libc.rs
index 1a3cd067c04..4757a5a268c 100644
--- a/src/tools/miri/tests/utils/libc.rs
+++ b/src/tools/miri/tests/utils/libc.rs
@@ -34,10 +34,7 @@ pub unsafe fn write_all(
         if res < 0 {
             return res;
         }
-        if res == 0 {
-            // EOF?
-            break;
-        }
+        // Apparently a return value of 0 is just a short write, nothing special (unlike reads).
         written_so_far += res as libc::size_t;
     }
     return written_so_far as libc::ssize_t;
diff --git a/tests/codegen-llvm/async-fn-debug-awaitee-field.rs b/tests/codegen-llvm/async-fn-debug-awaitee-field.rs
index 50860c90662..b9d3d9ee66e 100644
--- a/tests/codegen-llvm/async-fn-debug-awaitee-field.rs
+++ b/tests/codegen-llvm/async-fn-debug-awaitee-field.rs
@@ -18,11 +18,11 @@ pub async fn async_fn_test() {
 
 pub async fn foo() {}
 
+// NONMSVC: [[AWAITEE_TYPE:![0-9]*]] = !DICompositeType(tag: DW_TAG_structure_type, name: "{async_fn_env#0}", scope: [[AWAITEE_SCOPE:![0-9]*]],
+// MSVC: [[AWAITEE_TYPE:![0-9]*]] = !DICompositeType(tag: DW_TAG_union_type, name: "enum2$<async_fn_debug_awaitee_field::foo::async_fn_env$0>",
+// NONMSVC: [[AWAITEE_SCOPE]] = !DINamespace(name: "foo",
 // NONMSVC: [[GEN:!.*]] = !DICompositeType(tag: DW_TAG_structure_type, name: "{async_fn_env#0}", scope: [[GEN_SCOPE:![0-9]*]],
 // MSVC: [[GEN:!.*]] = !DICompositeType(tag: DW_TAG_union_type, name: "enum2$<async_fn_debug_awaitee_field::async_fn_test::async_fn_env$0>",
 // NONMSVC: [[GEN_SCOPE:!.*]] = !DINamespace(name: "async_fn_test",
 // CHECK: [[SUSPEND_STRUCT:!.*]] = !DICompositeType(tag: DW_TAG_structure_type, name: "Suspend0", scope: [[GEN]],
-// CHECK: !DIDerivedType(tag: DW_TAG_member, name: "__awaitee", scope: [[SUSPEND_STRUCT]], {{.*}}, baseType: [[AWAITEE_TYPE:![0-9]*]],
-// NONMSVC: [[AWAITEE_TYPE]] = !DICompositeType(tag: DW_TAG_structure_type, name: "{async_fn_env#0}", scope: [[AWAITEE_SCOPE:![0-9]*]],
-// MSVC: [[AWAITEE_TYPE]] = !DICompositeType(tag: DW_TAG_union_type, name: "enum2$<async_fn_debug_awaitee_field::foo::async_fn_env$0>",
-// NONMSVC: [[AWAITEE_SCOPE]] = !DINamespace(name: "foo",
+// CHECK: !DIDerivedType(tag: DW_TAG_member, name: "__awaitee", scope: [[SUSPEND_STRUCT]], {{.*}}, baseType: [[AWAITEE_TYPE]],
diff --git a/tests/codegen-units/item-collection/async-fn-impl.rs b/tests/codegen-units/item-collection/async-fn-impl.rs
new file mode 100644
index 00000000000..540fa0b3228
--- /dev/null
+++ b/tests/codegen-units/item-collection/async-fn-impl.rs
@@ -0,0 +1,10 @@
+//@ edition: 2024
+// When pub async fn is monomorphized, its implementation coroutine is also monomorphized
+//@ compile-flags: --crate-type=lib
+
+//~ MONO_ITEM fn async_fn @@
+//~ MONO_ITEM fn async_fn::{closure#0} @@
+#[unsafe(no_mangle)]
+pub async fn async_fn(x: u64) -> bool {
+    true
+}
diff --git a/tests/codegen-units/item-collection/opaque-return-impls.rs b/tests/codegen-units/item-collection/opaque-return-impls.rs
new file mode 100644
index 00000000000..7d5f4f5b669
--- /dev/null
+++ b/tests/codegen-units/item-collection/opaque-return-impls.rs
@@ -0,0 +1,89 @@
+//@ only-x86_64-unknown-linux-gnu
+//@ compile-flags: -C panic=abort -Zinline-mir=no -Copt-level=0 -Zcross-crate-inline-threshold=never -Zmir-opt-level=0 -Cno-prepopulate-passes
+//@ no-prefer-dynamic
+//@ edition:2024
+#![crate_type = "lib"]
+
+trait TestTrait {
+    fn test_func(&self);
+}
+
+struct TestStruct {}
+
+impl TestTrait for TestStruct {
+    fn test_func(&self) {
+        println!("TestStruct::test_func");
+    }
+}
+
+#[inline(never)]
+pub fn foo() -> impl TestTrait {
+    TestStruct {}
+}
+
+//~ MONO_ITEM fn foo
+//~ MONO_ITEM fn <TestStruct as TestTrait>::test_func
+
+trait TestTrait2 {
+    fn test_func2(&self);
+}
+
+struct TestStruct2 {}
+
+impl TestTrait2 for TestStruct2 {
+    fn test_func2(&self) {
+        println!("TestStruct2::test_func2");
+    }
+}
+
+#[inline(never)]
+pub fn foo2() -> Box<dyn TestTrait2> {
+    Box::new(TestStruct2 {})
+}
+
+//~ MONO_ITEM fn <TestStruct2 as TestTrait2>::test_func2
+//~ MONO_ITEM fn alloc::alloc::exchange_malloc
+//~ MONO_ITEM fn foo2
+//~ MONO_ITEM fn std::alloc::Global::alloc_impl
+//~ MONO_ITEM fn std::boxed::Box::<TestStruct2>::new
+//~ MONO_ITEM fn std::alloc::Layout::from_size_align_unchecked::precondition_check
+//~ MONO_ITEM fn std::ptr::NonNull::<T>::new_unchecked::precondition_check
+
+struct Counter {
+    count: usize,
+}
+
+impl Counter {
+    fn new() -> Counter {
+        Counter { count: 0 }
+    }
+}
+
+impl Iterator for Counter {
+    type Item = usize;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.count += 1;
+        if self.count < 6 { Some(self.count) } else { None }
+    }
+}
+
+#[inline(never)]
+pub fn foo3() -> Box<dyn Iterator<Item = usize>> {
+    Box::new(Counter::new())
+}
+
+//~ MONO_ITEM fn <Counter as std::iter::Iterator::advance_by::SpecAdvanceBy>::spec_advance_by
+//~ MONO_ITEM fn <Counter as std::iter::Iterator::advance_by::SpecAdvanceBy>::spec_advance_by::{closure#0}
+//~ MONO_ITEM fn <Counter as std::iter::Iterator>::advance_by
+//~ MONO_ITEM fn <Counter as std::iter::Iterator>::next
+//~ MONO_ITEM fn <Counter as std::iter::Iterator>::nth
+//~ MONO_ITEM fn <Counter as std::iter::Iterator>::size_hint
+//~ MONO_ITEM fn <Counter as std::iter::Iterator>::try_fold::<std::num::NonZero<usize>, {closure@<Counter as std::iter::Iterator::advance_by::SpecAdvanceBy>::spec_advance_by::{closure#0}}, std::option::Option<std::num::NonZero<usize>>>
+//~ MONO_ITEM fn <std::option::Option<std::num::NonZero<usize>> as std::ops::FromResidual<std::option::Option<std::convert::Infallible>>>::from_residual
+//~ MONO_ITEM fn <std::option::Option<std::num::NonZero<usize>> as std::ops::Try>::branch
+//~ MONO_ITEM fn <std::option::Option<std::num::NonZero<usize>> as std::ops::Try>::from_output
+//~ MONO_ITEM fn foo3
+//~ MONO_ITEM fn std::boxed::Box::<Counter>::new
+//~ MONO_ITEM fn Counter::new
+//~ MONO_ITEM fn core::fmt::rt::<impl std::fmt::Arguments<'_>>::new_const::<1>
diff --git a/tests/coverage/async.cov-map b/tests/coverage/async.cov-map
index c528ad525b5..d75c4b5981a 100644
--- a/tests/coverage/async.cov-map
+++ b/tests/coverage/async.cov-map
@@ -103,21 +103,21 @@ Number of file 0 mappings: 3
 Highest counter ID seen: (none)
 
 Function name: async::g
-Raw bytes (9): 0x[01, 01, 00, 01, 01, 1b, 01, 00, 16]
+Raw bytes (9): 0x[01, 01, 00, 01, 01, 1b, 01, 00, 12]
 Number of files: 1
 - file 0 => $DIR/async.rs
 Number of expressions: 0
 Number of file 0 mappings: 1
-- Code(Counter(0)) at (prev + 27, 1) to (start + 0, 22)
+- Code(Counter(0)) at (prev + 27, 1) to (start + 0, 18)
 Highest counter ID seen: c0
 
 Function name: async::g::{closure#0} (unused)
-Raw bytes (64): 0x[01, 01, 00, 0c, 00, 1b, 17, 00, 18, 00, 01, 0b, 00, 0c, 00, 01, 09, 00, 0a, 00, 00, 0e, 00, 17, 00, 00, 1b, 00, 1c, 00, 00, 20, 00, 22, 00, 01, 09, 00, 0a, 00, 00, 0e, 00, 17, 00, 00, 1b, 00, 1c, 00, 00, 20, 00, 22, 00, 01, 0e, 00, 10, 00, 02, 01, 00, 02]
+Raw bytes (64): 0x[01, 01, 00, 0c, 00, 1b, 13, 00, 14, 00, 01, 0b, 00, 0c, 00, 01, 09, 00, 0a, 00, 00, 0e, 00, 17, 00, 00, 1b, 00, 1c, 00, 00, 20, 00, 22, 00, 01, 09, 00, 0a, 00, 00, 0e, 00, 17, 00, 00, 1b, 00, 1c, 00, 00, 20, 00, 22, 00, 01, 0e, 00, 10, 00, 02, 01, 00, 02]
 Number of files: 1
 - file 0 => $DIR/async.rs
 Number of expressions: 0
 Number of file 0 mappings: 12
-- Code(Zero) at (prev + 27, 23) to (start + 0, 24)
+- Code(Zero) at (prev + 27, 19) to (start + 0, 20)
 - Code(Zero) at (prev + 1, 11) to (start + 0, 12)
 - Code(Zero) at (prev + 1, 9) to (start + 0, 10)
 - Code(Zero) at (prev + 0, 14) to (start + 0, 23)
diff --git a/tests/coverage/async.coverage b/tests/coverage/async.coverage
index 9fca1b6997d..9409e6b1deb 100644
--- a/tests/coverage/async.coverage
+++ b/tests/coverage/async.coverage
@@ -24,8 +24,8 @@
    LL|       |
    LL|      0|async fn foo() -> [bool; 10] { [false; 10] } // unused function; executor does not block on `h()`
    LL|       |
-   LL|      1|pub async fn g(x: u8) {
-                                    ^0
+   LL|      1|async fn g(x: u8) {
+                                ^0
    LL|      0|    match x {
    LL|      0|        y if e().await == y => (),
    LL|      0|        y if f().await == y => (),
diff --git a/tests/coverage/async.rs b/tests/coverage/async.rs
index da0a1c0b6f0..777ad7ce7c0 100644
--- a/tests/coverage/async.rs
+++ b/tests/coverage/async.rs
@@ -24,7 +24,7 @@ async fn f() -> u8 { 1 }
 
 async fn foo() -> [bool; 10] { [false; 10] } // unused function; executor does not block on `h()`
 
-pub async fn g(x: u8) {
+async fn g(x: u8) {
     match x {
         y if e().await == y => (),
         y if f().await == y => (),
diff --git a/tests/ui/async-await/future-sizes/async-awaiting-fut.rs b/tests/ui/async-await/future-sizes/async-awaiting-fut.rs
index a3f0bdc8514..b5f59069f85 100644
--- a/tests/ui/async-await/future-sizes/async-awaiting-fut.rs
+++ b/tests/ui/async-await/future-sizes/async-awaiting-fut.rs
@@ -1,7 +1,9 @@
-//@ compile-flags: -Z print-type-sizes --crate-type lib
+//@ compile-flags: -C panic=abort -Z print-type-sizes --crate-type lib
+//@ needs-deterministic-layouts
 //@ edition:2021
 //@ build-pass
 //@ ignore-pass
+//@ only-x86_64
 
 async fn wait() {}
 
diff --git a/tests/ui/async-await/future-sizes/async-awaiting-fut.stdout b/tests/ui/async-await/future-sizes/async-awaiting-fut.stdout
index 642e27b2a57..b30c15bcbe6 100644
--- a/tests/ui/async-await/future-sizes/async-awaiting-fut.stdout
+++ b/tests/ui/async-await/future-sizes/async-awaiting-fut.stdout
@@ -48,6 +48,39 @@ print-type-size     variant `Returned`: 1024 bytes
 print-type-size         upvar `.arg`: 1024 bytes
 print-type-size     variant `Panicked`: 1024 bytes
 print-type-size         upvar `.arg`: 1024 bytes
+print-type-size type: `std::task::Context<'_>`: 32 bytes, alignment: 8 bytes
+print-type-size     field `.waker`: 8 bytes
+print-type-size     field `.local_waker`: 8 bytes
+print-type-size     field `.ext`: 16 bytes
+print-type-size     field `._marker`: 0 bytes
+print-type-size     field `._marker2`: 0 bytes
+print-type-size type: `std::panic::Location<'_>`: 24 bytes, alignment: 8 bytes
+print-type-size     field `.filename`: 16 bytes
+print-type-size     field `.line`: 4 bytes
+print-type-size     field `.col`: 4 bytes
+print-type-size     field `._filename`: 0 bytes
+print-type-size type: `core::task::wake::ExtData<'_>`: 16 bytes, alignment: 8 bytes
+print-type-size     variant `Some`: 16 bytes
+print-type-size         field `.0`: 16 bytes
+print-type-size     variant `None`: 0 bytes
+print-type-size         field `.0`: 0 bytes
+print-type-size type: `std::panic::AssertUnwindSafe<core::task::wake::ExtData<'_>>`: 16 bytes, alignment: 8 bytes
+print-type-size     field `.0`: 16 bytes
+print-type-size type: `std::ptr::NonNull<str>`: 16 bytes, alignment: 8 bytes
+print-type-size     field `.pointer`: 16 bytes
+print-type-size type: `std::pin::Pin<&mut {async fn body of big_fut()}>`: 8 bytes, alignment: 8 bytes
+print-type-size     field `.pointer`: 8 bytes
+print-type-size type: `std::pin::Pin<&mut {async fn body of calls_fut<{async fn body of big_fut()}>()}>`: 8 bytes, alignment: 8 bytes
+print-type-size     field `.pointer`: 8 bytes
+print-type-size type: `std::pin::Pin<&mut {async fn body of test()}>`: 8 bytes, alignment: 8 bytes
+print-type-size     field `.pointer`: 8 bytes
+print-type-size type: `std::pin::Pin<&mut {async fn body of wait()}>`: 8 bytes, alignment: 8 bytes
+print-type-size     field `.pointer`: 8 bytes
+print-type-size type: `std::ptr::DynMetadata<dyn std::any::Any>`: 8 bytes, alignment: 8 bytes
+print-type-size     field `._vtable_ptr`: 8 bytes
+print-type-size     field `._phantom`: 0 bytes
+print-type-size type: `std::ptr::NonNull<std::ptr::metadata::VTable>`: 8 bytes, alignment: 8 bytes
+print-type-size     field `.pointer`: 8 bytes
 print-type-size type: `std::mem::ManuallyDrop<bool>`: 1 bytes, alignment: 1 bytes
 print-type-size     field `.value`: 1 bytes
 print-type-size type: `std::mem::ManuallyDrop<{async fn body of wait()}>`: 1 bytes, alignment: 1 bytes
@@ -70,3 +103,7 @@ print-type-size     discriminant: 1 bytes
 print-type-size     variant `Unresumed`: 0 bytes
 print-type-size     variant `Returned`: 0 bytes
 print-type-size     variant `Panicked`: 0 bytes
+print-type-size type: `std::marker::PhantomData<&str>`: 0 bytes, alignment: 1 bytes
+print-type-size type: `std::marker::PhantomData<*mut ()>`: 0 bytes, alignment: 1 bytes
+print-type-size type: `std::marker::PhantomData<dyn std::any::Any>`: 0 bytes, alignment: 1 bytes
+print-type-size type: `std::marker::PhantomData<fn(&()) -> &()>`: 0 bytes, alignment: 1 bytes
diff --git a/tests/ui/async-await/future-sizes/large-arg.rs b/tests/ui/async-await/future-sizes/large-arg.rs
index 5fbae22a771..809f7cf1f93 100644
--- a/tests/ui/async-await/future-sizes/large-arg.rs
+++ b/tests/ui/async-await/future-sizes/large-arg.rs
@@ -1,7 +1,9 @@
-//@ compile-flags: -Z print-type-sizes --crate-type=lib
+//@ compile-flags: -C panic=abort -Z print-type-sizes --crate-type=lib
+//@ needs-deterministic-layouts
 //@ edition: 2021
 //@ build-pass
 //@ ignore-pass
+//@ only-x86_64
 
 pub async fn test() {
     let _ = a([0u8; 1024]).await;
diff --git a/tests/ui/async-await/future-sizes/large-arg.stdout b/tests/ui/async-await/future-sizes/large-arg.stdout
index 67168a3d6ef..e00420d1493 100644
--- a/tests/ui/async-await/future-sizes/large-arg.stdout
+++ b/tests/ui/async-await/future-sizes/large-arg.stdout
@@ -58,3 +58,45 @@ print-type-size     variant `Returned`: 1024 bytes
 print-type-size         upvar `.t`: 1024 bytes
 print-type-size     variant `Panicked`: 1024 bytes
 print-type-size         upvar `.t`: 1024 bytes
+print-type-size type: `std::task::Context<'_>`: 32 bytes, alignment: 8 bytes
+print-type-size     field `.waker`: 8 bytes
+print-type-size     field `.local_waker`: 8 bytes
+print-type-size     field `.ext`: 16 bytes
+print-type-size     field `._marker`: 0 bytes
+print-type-size     field `._marker2`: 0 bytes
+print-type-size type: `std::panic::Location<'_>`: 24 bytes, alignment: 8 bytes
+print-type-size     field `.filename`: 16 bytes
+print-type-size     field `.line`: 4 bytes
+print-type-size     field `.col`: 4 bytes
+print-type-size     field `._filename`: 0 bytes
+print-type-size type: `core::task::wake::ExtData<'_>`: 16 bytes, alignment: 8 bytes
+print-type-size     variant `Some`: 16 bytes
+print-type-size         field `.0`: 16 bytes
+print-type-size     variant `None`: 0 bytes
+print-type-size         field `.0`: 0 bytes
+print-type-size type: `std::panic::AssertUnwindSafe<core::task::wake::ExtData<'_>>`: 16 bytes, alignment: 8 bytes
+print-type-size     field `.0`: 16 bytes
+print-type-size type: `std::ptr::NonNull<str>`: 16 bytes, alignment: 8 bytes
+print-type-size     field `.pointer`: 16 bytes
+print-type-size type: `std::pin::Pin<&mut {async fn body of a<[u8; 1024]>()}>`: 8 bytes, alignment: 8 bytes
+print-type-size     field `.pointer`: 8 bytes
+print-type-size type: `std::pin::Pin<&mut {async fn body of b<[u8; 1024]>()}>`: 8 bytes, alignment: 8 bytes
+print-type-size     field `.pointer`: 8 bytes
+print-type-size type: `std::pin::Pin<&mut {async fn body of c<[u8; 1024]>()}>`: 8 bytes, alignment: 8 bytes
+print-type-size     field `.pointer`: 8 bytes
+print-type-size type: `std::pin::Pin<&mut {async fn body of test()}>`: 8 bytes, alignment: 8 bytes
+print-type-size     field `.pointer`: 8 bytes
+print-type-size type: `std::ptr::DynMetadata<dyn std::any::Any>`: 8 bytes, alignment: 8 bytes
+print-type-size     field `._vtable_ptr`: 8 bytes
+print-type-size     field `._phantom`: 0 bytes
+print-type-size type: `std::ptr::NonNull<std::ptr::metadata::VTable>`: 8 bytes, alignment: 8 bytes
+print-type-size     field `.pointer`: 8 bytes
+print-type-size type: `std::task::Poll<()>`: 1 bytes, alignment: 1 bytes
+print-type-size     discriminant: 1 bytes
+print-type-size     variant `Ready`: 0 bytes
+print-type-size         field `.0`: 0 bytes
+print-type-size     variant `Pending`: 0 bytes
+print-type-size type: `std::marker::PhantomData<&str>`: 0 bytes, alignment: 1 bytes
+print-type-size type: `std::marker::PhantomData<*mut ()>`: 0 bytes, alignment: 1 bytes
+print-type-size type: `std::marker::PhantomData<dyn std::any::Any>`: 0 bytes, alignment: 1 bytes
+print-type-size type: `std::marker::PhantomData<fn(&()) -> &()>`: 0 bytes, alignment: 1 bytes
diff --git a/tests/ui/impl-trait/non-defining-uses/as-projection-term.next.stderr b/tests/ui/impl-trait/non-defining-uses/as-projection-term.next.stderr
deleted file mode 100644
index 96e242d5d48..00000000000
--- a/tests/ui/impl-trait/non-defining-uses/as-projection-term.next.stderr
+++ /dev/null
@@ -1,8 +0,0 @@
-error: non-defining use of `impl Sized + '_` in the defining scope
-  --> $DIR/as-projection-term.rs:14:19
-   |
-LL |     prove_proj(|| recur());
-   |                   ^^^^^^^
-
-error: aborting due to 1 previous error
-
diff --git a/tests/ui/impl-trait/non-defining-uses/as-projection-term.rs b/tests/ui/impl-trait/non-defining-uses/as-projection-term.rs
index f0cf333b6a1..19f983bab70 100644
--- a/tests/ui/impl-trait/non-defining-uses/as-projection-term.rs
+++ b/tests/ui/impl-trait/non-defining-uses/as-projection-term.rs
@@ -1,7 +1,7 @@
 //@ revisions: current next
 //@[next] compile-flags: -Znext-solver
 //@ ignore-compare-mode-next-solver (explicit revisions)
-//@[current] check-pass
+//@ check-pass
 
 fn prove_proj<R>(_: impl FnOnce() -> R) {}
 fn recur<'a>() -> impl Sized + 'a {
@@ -12,6 +12,6 @@ fn recur<'a>() -> impl Sized + 'a {
     // inference variable at this point, we unify it with `opaque<'1>` and
     // end up ignoring that defining use as the hidden type is equal to its key.
     prove_proj(|| recur());
-    //[next]~^ ERROR non-defining use of `impl Sized + '_` in the defining scope
 }
+
 fn main() {}
diff --git a/tests/ui/impl-trait/non-defining-uses/double-wrap-with-defining-use.current.stderr b/tests/ui/impl-trait/non-defining-uses/double-wrap-with-defining-use.current.stderr
new file mode 100644
index 00000000000..30424ec58f9
--- /dev/null
+++ b/tests/ui/impl-trait/non-defining-uses/double-wrap-with-defining-use.current.stderr
@@ -0,0 +1,17 @@
+error[E0792]: expected generic type parameter, found `impl Foo`
+  --> $DIR/double-wrap-with-defining-use.rs:12:26
+   |
+LL | fn a<T: Foo>(x: T) -> impl Foo {
+   |      - this generic parameter must be used with a generic type parameter
+LL |     if true { x } else { a(a(x)) }
+   |                          ^^^^^^^
+
+error: type parameter `T` is part of concrete type but not used in parameter list for the `impl Trait` type alias
+  --> $DIR/double-wrap-with-defining-use.rs:12:26
+   |
+LL |     if true { x } else { a(a(x)) }
+   |                          ^^^^^^^
+
+error: aborting due to 2 previous errors
+
+For more information about this error, try `rustc --explain E0792`.
diff --git a/tests/ui/impl-trait/non-defining-uses/double-wrap-with-defining-use.rs b/tests/ui/impl-trait/non-defining-uses/double-wrap-with-defining-use.rs
index 339277fec37..734b1920772 100644
--- a/tests/ui/impl-trait/non-defining-uses/double-wrap-with-defining-use.rs
+++ b/tests/ui/impl-trait/non-defining-uses/double-wrap-with-defining-use.rs
@@ -1,12 +1,17 @@
 // Regression test for ICE from issue #140545
 // The error message is confusing and wrong, but that's a different problem (#139350)
+
 //@ edition:2018
+//@ revisions: current next
+//@[next] compile-flags: -Znext-solver
+//@ ignore-compare-mode-next-solver (explicit revisions)
+//@[next] check-pass
 
 trait Foo {}
-fn a(x: impl Foo) -> impl Foo {
+fn a<T: Foo>(x: T) -> impl Foo {
     if true { x } else { a(a(x)) }
-    //~^ ERROR: expected generic type parameter, found `impl Foo` [E0792]
-    //~| ERROR: type parameter `impl Foo` is part of concrete type but not used in parameter list for the `impl Trait` type alias
+    //[current]~^ ERROR: expected generic type parameter, found `impl Foo` [E0792]
+    //[current]~| ERROR: type parameter `T` is part of concrete type but not used in parameter list for the `impl Trait` type alias
 }
 
 fn main(){}
diff --git a/tests/ui/impl-trait/non-defining-uses/double-wrap-with-defining-use.stderr b/tests/ui/impl-trait/non-defining-uses/double-wrap-with-defining-use.stderr
deleted file mode 100644
index 1b02811e31b..00000000000
--- a/tests/ui/impl-trait/non-defining-uses/double-wrap-with-defining-use.stderr
+++ /dev/null
@@ -1,17 +0,0 @@
-error[E0792]: expected generic type parameter, found `impl Foo`
-  --> $DIR/double-wrap-with-defining-use.rs:7:26
-   |
-LL | fn a(x: impl Foo) -> impl Foo {
-   |         -------- this generic parameter must be used with a generic type parameter
-LL |     if true { x } else { a(a(x)) }
-   |                          ^^^^^^^
-
-error: type parameter `impl Foo` is part of concrete type but not used in parameter list for the `impl Trait` type alias
-  --> $DIR/double-wrap-with-defining-use.rs:7:26
-   |
-LL |     if true { x } else { a(a(x)) }
-   |                          ^^^^^^^
-
-error: aborting due to 2 previous errors
-
-For more information about this error, try `rustc --explain E0792`.
diff --git a/tests/ui/impl-trait/non-defining-uses/recursive-call.rs b/tests/ui/impl-trait/non-defining-uses/recursive-call.rs
new file mode 100644
index 00000000000..ecddf2cec47
--- /dev/null
+++ b/tests/ui/impl-trait/non-defining-uses/recursive-call.rs
@@ -0,0 +1,30 @@
+//@ revisions: current next
+//@[next] compile-flags: -Znext-solver
+//@ ignore-compare-mode-next-solver (explicit revisions)
+//@ check-pass
+
+// Regression test for the non-defining use error in `gll`.
+
+struct Foo;
+impl Foo {
+    fn recur(&self, b: bool) -> impl Sized + '_ {
+        if b {
+            let temp = Foo;
+            temp.recur(false);
+            // desugars to `Foo::recur(&temp);`
+        }
+
+        self
+    }
+
+    fn in_closure(&self) -> impl Sized + '_ {
+        let _ = || {
+            let temp = Foo;
+            temp.in_closure();
+            // desugars to `Foo::in_closure(&temp);`
+        };
+
+        self
+    }
+}
+fn main() {}
diff --git a/tests/ui/print_type_sizes/async.rs b/tests/ui/print_type_sizes/async.rs
index 805bccbcf63..951e7cd1012 100644
--- a/tests/ui/print_type_sizes/async.rs
+++ b/tests/ui/print_type_sizes/async.rs
@@ -1,7 +1,9 @@
-//@ compile-flags: -Z print-type-sizes --crate-type lib
+//@ compile-flags: -C panic=abort -Z print-type-sizes --crate-type lib
+//@ needs-deterministic-layouts
 //@ edition:2021
 //@ build-pass
 //@ ignore-pass
+//@ only-x86_64
 
 #![allow(dropping_copy_types)]
 
diff --git a/tests/ui/print_type_sizes/async.stdout b/tests/ui/print_type_sizes/async.stdout
index 83a6962e4cd..d3d6b6471c6 100644
--- a/tests/ui/print_type_sizes/async.stdout
+++ b/tests/ui/print_type_sizes/async.stdout
@@ -16,6 +16,35 @@ print-type-size type: `std::mem::MaybeUninit<[u8; 8192]>`: 8192 bytes, alignment
 print-type-size     variant `MaybeUninit`: 8192 bytes
 print-type-size         field `.uninit`: 0 bytes
 print-type-size         field `.value`: 8192 bytes
+print-type-size type: `std::task::Context<'_>`: 32 bytes, alignment: 8 bytes
+print-type-size     field `.waker`: 8 bytes
+print-type-size     field `.local_waker`: 8 bytes
+print-type-size     field `.ext`: 16 bytes
+print-type-size     field `._marker`: 0 bytes
+print-type-size     field `._marker2`: 0 bytes
+print-type-size type: `std::panic::Location<'_>`: 24 bytes, alignment: 8 bytes
+print-type-size     field `.filename`: 16 bytes
+print-type-size     field `.line`: 4 bytes
+print-type-size     field `.col`: 4 bytes
+print-type-size     field `._filename`: 0 bytes
+print-type-size type: `core::task::wake::ExtData<'_>`: 16 bytes, alignment: 8 bytes
+print-type-size     variant `Some`: 16 bytes
+print-type-size         field `.0`: 16 bytes
+print-type-size     variant `None`: 0 bytes
+print-type-size         field `.0`: 0 bytes
+print-type-size type: `std::panic::AssertUnwindSafe<core::task::wake::ExtData<'_>>`: 16 bytes, alignment: 8 bytes
+print-type-size     field `.0`: 16 bytes
+print-type-size type: `std::ptr::NonNull<str>`: 16 bytes, alignment: 8 bytes
+print-type-size     field `.pointer`: 16 bytes
+print-type-size type: `std::pin::Pin<&mut {async fn body of test()}>`: 8 bytes, alignment: 8 bytes
+print-type-size     field `.pointer`: 8 bytes
+print-type-size type: `std::pin::Pin<&mut {async fn body of wait()}>`: 8 bytes, alignment: 8 bytes
+print-type-size     field `.pointer`: 8 bytes
+print-type-size type: `std::ptr::DynMetadata<dyn std::any::Any>`: 8 bytes, alignment: 8 bytes
+print-type-size     field `._vtable_ptr`: 8 bytes
+print-type-size     field `._phantom`: 0 bytes
+print-type-size type: `std::ptr::NonNull<std::ptr::metadata::VTable>`: 8 bytes, alignment: 8 bytes
+print-type-size     field `.pointer`: 8 bytes
 print-type-size type: `std::mem::ManuallyDrop<{async fn body of wait()}>`: 1 bytes, alignment: 1 bytes
 print-type-size     field `.value`: 1 bytes
 print-type-size type: `std::mem::MaybeUninit<{async fn body of wait()}>`: 1 bytes, alignment: 1 bytes
@@ -32,3 +61,7 @@ print-type-size     discriminant: 1 bytes
 print-type-size     variant `Unresumed`: 0 bytes
 print-type-size     variant `Returned`: 0 bytes
 print-type-size     variant `Panicked`: 0 bytes
+print-type-size type: `std::marker::PhantomData<&str>`: 0 bytes, alignment: 1 bytes
+print-type-size type: `std::marker::PhantomData<*mut ()>`: 0 bytes, alignment: 1 bytes
+print-type-size type: `std::marker::PhantomData<dyn std::any::Any>`: 0 bytes, alignment: 1 bytes
+print-type-size type: `std::marker::PhantomData<fn(&()) -> &()>`: 0 bytes, alignment: 1 bytes