about summary refs log tree commit diff
path: root/compiler/rustc_mir_transform/src/inline.rs
diff options
context:
space:
mode:
authorDavid Wood <david.wood2@arm.com>2024-09-23 18:46:23 +0100
committerDavid Wood <david.wood2@arm.com>2025-01-10 18:37:54 +0000
commitf86169a58f212b174a01aa721545df009e96bfda (patch)
tree8dcb0c44ab5c144297435b7f5cf279f1af979d7a /compiler/rustc_mir_transform/src/inline.rs
parent336209eef13882bd1e211b24779584cb7ef911eb (diff)
downloadrust-f86169a58f212b174a01aa721545df009e96bfda.tar.gz
rust-f86169a58f212b174a01aa721545df009e96bfda.zip
mir_transform: implement forced inlining
Adds `#[rustc_force_inline]` which is similar to always inlining but
reports an error if the inlining was not possible, and which always
attempts to inline annotated items, regardless of optimisation levels.
It can only be applied to free functions to guarantee that the MIR
inliner will be able to resolve calls.
Diffstat (limited to 'compiler/rustc_mir_transform/src/inline.rs')
-rw-r--r--compiler/rustc_mir_transform/src/inline.rs1498
1 files changed, 871 insertions, 627 deletions
diff --git a/compiler/rustc_mir_transform/src/inline.rs b/compiler/rustc_mir_transform/src/inline.rs
index 339acbad6b9..a5a4b5987f5 100644
--- a/compiler/rustc_mir_transform/src/inline.rs
+++ b/compiler/rustc_mir_transform/src/inline.rs
@@ -6,7 +6,7 @@ use std::ops::{Range, RangeFrom};
 use rustc_abi::{ExternAbi, FieldIdx};
 use rustc_attr_parsing::InlineAttr;
 use rustc_hir::def::DefKind;
-use rustc_hir::def_id::DefId;
+use rustc_hir::def_id::{DefId, LocalDefId};
 use rustc_index::Idx;
 use rustc_index::bit_set::BitSet;
 use rustc_middle::bug;
@@ -29,10 +29,6 @@ pub(crate) mod cycle;
 
 const TOP_DOWN_DEPTH_LIMIT: usize = 5;
 
-// Made public so that `mir_drops_elaborated_and_const_checked` can be overridden
-// by custom rustc drivers, running all the steps by themselves. See #114628.
-pub struct Inline;
-
 #[derive(Clone, Debug)]
 struct CallSite<'tcx> {
     callee: Instance<'tcx>,
@@ -41,14 +37,12 @@ struct CallSite<'tcx> {
     source_info: SourceInfo,
 }
 
+// Made public so that `mir_drops_elaborated_and_const_checked` can be overridden
+// by custom rustc drivers, running all the steps by themselves. See #114628.
+pub struct Inline;
+
 impl<'tcx> crate::MirPass<'tcx> for Inline {
     fn is_enabled(&self, sess: &rustc_session::Session) -> bool {
-        // FIXME(#127234): Coverage instrumentation currently doesn't handle inlined
-        // MIR correctly when Modified Condition/Decision Coverage is enabled.
-        if sess.instrument_coverage_mcdc() {
-            return false;
-        }
-
         if let Some(enabled) = sess.opts.unstable_opts.inline_mir {
             return enabled;
         }
@@ -67,7 +61,7 @@ impl<'tcx> crate::MirPass<'tcx> for Inline {
     fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
         let span = trace_span!("inline", body = %tcx.def_path_str(body.source.def_id()));
         let _guard = span.enter();
-        if inline(tcx, body) {
+        if inline::<NormalInliner<'tcx>>(tcx, body) {
             debug!("running simplify cfg on {:?}", body.source);
             simplify_cfg(body);
             deref_finder(tcx, body);
@@ -75,47 +69,84 @@ impl<'tcx> crate::MirPass<'tcx> for Inline {
     }
 }
 
-fn inline<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) -> bool {
-    let def_id = body.source.def_id().expect_local();
+pub struct ForceInline;
 
-    // Only do inlining into fn bodies.
-    if !tcx.hir().body_owner_kind(def_id).is_fn_or_closure() {
-        return false;
+impl ForceInline {
+    pub fn should_run_pass_for_callee<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> bool {
+        matches!(tcx.codegen_fn_attrs(def_id).inline, InlineAttr::Force { .. })
     }
-    if body.source.promoted.is_some() {
-        return false;
+}
+
+impl<'tcx> crate::MirPass<'tcx> for ForceInline {
+    fn is_enabled(&self, _: &rustc_session::Session) -> bool {
+        true
     }
-    // Avoid inlining into coroutines, since their `optimized_mir` is used for layout computation,
-    // which can create a cycle, even when no attempt is made to inline the function in the other
-    // direction.
-    if body.coroutine.is_some() {
-        return false;
+
+    fn can_be_overridden(&self) -> bool {
+        false
     }
 
-    let typing_env = body.typing_env(tcx);
-    let codegen_fn_attrs = tcx.codegen_fn_attrs(def_id);
+    fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
+        let span = trace_span!("force_inline", body = %tcx.def_path_str(body.source.def_id()));
+        let _guard = span.enter();
+        if inline::<ForceInliner<'tcx>>(tcx, body) {
+            debug!("running simplify cfg on {:?}", body.source);
+            simplify_cfg(body);
+            deref_finder(tcx, body);
+        }
+    }
+}
 
-    let mut this = Inliner {
-        tcx,
-        typing_env,
-        codegen_fn_attrs,
-        history: Vec::new(),
-        changed: false,
-        caller_is_inline_forwarder: matches!(
-            codegen_fn_attrs.inline,
-            InlineAttr::Hint | InlineAttr::Always
-        ) && body_is_forwarder(body),
-    };
-    let blocks = START_BLOCK..body.basic_blocks.next_index();
-    this.process_blocks(body, blocks);
-    this.changed
+trait Inliner<'tcx> {
+    fn new(tcx: TyCtxt<'tcx>, def_id: LocalDefId, body: &Body<'tcx>) -> Self;
+
+    fn tcx(&self) -> TyCtxt<'tcx>;
+    fn typing_env(&self) -> ty::TypingEnv<'tcx>;
+    fn history(&self) -> &[DefId];
+    fn caller_def_id(&self) -> LocalDefId;
+
+    /// Has the caller body been changed?
+    fn changed(self) -> bool;
+
+    /// Should inlining happen for a given callee?
+    fn should_inline_for_callee(&self, def_id: DefId) -> bool;
+
+    fn check_caller_mir_body(&self, body: &Body<'tcx>) -> bool;
+
+    /// Returns inlining decision that is based on the examination of callee MIR body.
+    /// Assumes that codegen attributes have been checked for compatibility already.
+    fn check_callee_mir_body(
+        &self,
+        callsite: &CallSite<'tcx>,
+        callee_body: &Body<'tcx>,
+        callee_attrs: &CodegenFnAttrs,
+        cross_crate_inlinable: bool,
+    ) -> Result<(), &'static str>;
+
+    // How many callsites in a body are we allowed to inline? We need to limit this in order
+    // to prevent super-linear growth in MIR size.
+    fn inline_limit_for_block(&self) -> Option<usize>;
+
+    /// Called when inlining succeeds.
+    fn on_inline_success(
+        &mut self,
+        callsite: &CallSite<'tcx>,
+        caller_body: &mut Body<'tcx>,
+        new_blocks: std::ops::Range<BasicBlock>,
+    );
+
+    /// Called when inlining failed or was not performed.
+    fn on_inline_failure(&self, callsite: &CallSite<'tcx>, reason: &'static str);
+
+    /// Called when the inline limit for a body is reached.
+    fn on_inline_limit_reached(&self) -> bool;
 }
 
-struct Inliner<'tcx> {
+struct ForceInliner<'tcx> {
     tcx: TyCtxt<'tcx>,
     typing_env: ty::TypingEnv<'tcx>,
-    /// Caller codegen attributes.
-    codegen_fn_attrs: &'tcx CodegenFnAttrs,
+    /// `DefId` of caller.
+    def_id: LocalDefId,
     /// Stack of inlined instances.
     /// We only check the `DefId` and not the args because we want to
     /// avoid inlining cases of polymorphic recursion.
@@ -124,366 +155,190 @@ struct Inliner<'tcx> {
     history: Vec<DefId>,
     /// Indicates that the caller body has been modified.
     changed: bool,
-    /// Indicates that the caller is #[inline] and just calls another function,
-    /// and thus we can inline less into it as it'll be inlined itself.
-    caller_is_inline_forwarder: bool,
 }
 
-impl<'tcx> Inliner<'tcx> {
-    fn process_blocks(&mut self, caller_body: &mut Body<'tcx>, blocks: Range<BasicBlock>) {
-        // How many callsites in this body are we allowed to inline? We need to limit this in order
-        // to prevent super-linear growth in MIR size
-        let inline_limit = match self.history.len() {
-            0 => usize::MAX,
-            1..=TOP_DOWN_DEPTH_LIMIT => 1,
-            _ => return,
-        };
-        let mut inlined_count = 0;
-        for bb in blocks {
-            let bb_data = &caller_body[bb];
-            if bb_data.is_cleanup {
-                continue;
-            }
-
-            let Some(callsite) = self.resolve_callsite(caller_body, bb, bb_data) else {
-                continue;
-            };
-
-            let span = trace_span!("process_blocks", %callsite.callee, ?bb);
-            let _guard = span.enter();
-
-            match self.try_inlining(caller_body, &callsite) {
-                Err(reason) => {
-                    debug!("not-inlined {} [{}]", callsite.callee, reason);
-                }
-                Ok(new_blocks) => {
-                    debug!("inlined {}", callsite.callee);
-                    self.changed = true;
-
-                    self.history.push(callsite.callee.def_id());
-                    self.process_blocks(caller_body, new_blocks);
-                    self.history.pop();
-
-                    inlined_count += 1;
-                    if inlined_count == inline_limit {
-                        debug!("inline count reached");
-                        return;
-                    }
-                }
-            }
-        }
+impl<'tcx> Inliner<'tcx> for ForceInliner<'tcx> {
+    fn new(tcx: TyCtxt<'tcx>, def_id: LocalDefId, body: &Body<'tcx>) -> Self {
+        Self { tcx, typing_env: body.typing_env(tcx), def_id, history: Vec::new(), changed: false }
     }
 
-    /// Attempts to inline a callsite into the caller body. When successful returns basic blocks
-    /// containing the inlined body. Otherwise returns an error describing why inlining didn't take
-    /// place.
-    fn try_inlining(
-        &self,
-        caller_body: &mut Body<'tcx>,
-        callsite: &CallSite<'tcx>,
-    ) -> Result<std::ops::Range<BasicBlock>, &'static str> {
-        self.check_mir_is_available(caller_body, callsite.callee)?;
-
-        let callee_attrs = self.tcx.codegen_fn_attrs(callsite.callee.def_id());
-        let cross_crate_inlinable = self.tcx.cross_crate_inlinable(callsite.callee.def_id());
-        self.check_codegen_attributes(callsite, callee_attrs, cross_crate_inlinable)?;
-
-        // Intrinsic fallback bodies are automatically made cross-crate inlineable,
-        // but at this stage we don't know whether codegen knows the intrinsic,
-        // so just conservatively don't inline it. This also ensures that we do not
-        // accidentally inline the body of an intrinsic that *must* be overridden.
-        if self.tcx.has_attr(callsite.callee.def_id(), sym::rustc_intrinsic) {
-            return Err("Callee is an intrinsic, do not inline fallback bodies");
-        }
-
-        let terminator = caller_body[callsite.block].terminator.as_ref().unwrap();
-        let TerminatorKind::Call { args, destination, .. } = &terminator.kind else { bug!() };
-        let destination_ty = destination.ty(&caller_body.local_decls, self.tcx).ty;
-        for arg in args {
-            if !arg.node.ty(&caller_body.local_decls, self.tcx).is_sized(self.tcx, self.typing_env)
-            {
-                // We do not allow inlining functions with unsized params. Inlining these functions
-                // could create unsized locals, which are unsound and being phased out.
-                return Err("Call has unsized argument");
-            }
-        }
-
-        let callee_body = try_instance_mir(self.tcx, callsite.callee.def)?;
-        self.check_mir_body(callsite, callee_body, callee_attrs, cross_crate_inlinable)?;
-
-        let Ok(callee_body) = callsite.callee.try_instantiate_mir_and_normalize_erasing_regions(
-            self.tcx,
-            self.typing_env,
-            ty::EarlyBinder::bind(callee_body.clone()),
-        ) else {
-            return Err("failed to normalize callee body");
-        };
-
-        // Normally, this shouldn't be required, but trait normalization failure can create a
-        // validation ICE.
-        if !validate_types(self.tcx, self.typing_env, &callee_body, &caller_body).is_empty() {
-            return Err("failed to validate callee body");
-        }
-
-        // Check call signature compatibility.
-        // Normally, this shouldn't be required, but trait normalization failure can create a
-        // validation ICE.
-        let output_type = callee_body.return_ty();
-        if !util::sub_types(self.tcx, self.typing_env, output_type, destination_ty) {
-            trace!(?output_type, ?destination_ty);
-            return Err("failed to normalize return type");
-        }
-        if callsite.fn_sig.abi() == ExternAbi::RustCall {
-            // FIXME: Don't inline user-written `extern "rust-call"` functions,
-            // since this is generally perf-negative on rustc, and we hope that
-            // LLVM will inline these functions instead.
-            if callee_body.spread_arg.is_some() {
-                return Err("do not inline user-written rust-call functions");
-            }
+    fn tcx(&self) -> TyCtxt<'tcx> {
+        self.tcx
+    }
 
-            let (self_arg, arg_tuple) = match &args[..] {
-                [arg_tuple] => (None, arg_tuple),
-                [self_arg, arg_tuple] => (Some(self_arg), arg_tuple),
-                _ => bug!("Expected `rust-call` to have 1 or 2 args"),
-            };
+    fn typing_env(&self) -> ty::TypingEnv<'tcx> {
+        self.typing_env
+    }
 
-            let self_arg_ty =
-                self_arg.map(|self_arg| self_arg.node.ty(&caller_body.local_decls, self.tcx));
+    fn history(&self) -> &[DefId] {
+        &self.history
+    }
 
-            let arg_tuple_ty = arg_tuple.node.ty(&caller_body.local_decls, self.tcx);
-            let ty::Tuple(arg_tuple_tys) = *arg_tuple_ty.kind() else {
-                bug!("Closure arguments are not passed as a tuple");
-            };
+    fn caller_def_id(&self) -> LocalDefId {
+        self.def_id
+    }
 
-            for (arg_ty, input) in
-                self_arg_ty.into_iter().chain(arg_tuple_tys).zip(callee_body.args_iter())
-            {
-                let input_type = callee_body.local_decls[input].ty;
-                if !util::sub_types(self.tcx, self.typing_env, input_type, arg_ty) {
-                    trace!(?arg_ty, ?input_type);
-                    return Err("failed to normalize tuple argument type");
-                }
-            }
-        } else {
-            for (arg, input) in args.iter().zip(callee_body.args_iter()) {
-                let input_type = callee_body.local_decls[input].ty;
-                let arg_ty = arg.node.ty(&caller_body.local_decls, self.tcx);
-                if !util::sub_types(self.tcx, self.typing_env, input_type, arg_ty) {
-                    trace!(?arg_ty, ?input_type);
-                    return Err("failed to normalize argument type");
-                }
-            }
-        }
+    fn changed(self) -> bool {
+        self.changed
+    }
 
-        let old_blocks = caller_body.basic_blocks.next_index();
-        self.inline_call(caller_body, callsite, callee_body);
-        let new_blocks = old_blocks..caller_body.basic_blocks.next_index();
+    fn should_inline_for_callee(&self, def_id: DefId) -> bool {
+        ForceInline::should_run_pass_for_callee(self.tcx(), def_id)
+    }
 
-        Ok(new_blocks)
+    fn check_caller_mir_body(&self, _: &Body<'tcx>) -> bool {
+        true
     }
 
-    fn check_mir_is_available(
+    #[instrument(level = "debug", skip(self, callee_body))]
+    fn check_callee_mir_body(
         &self,
-        caller_body: &Body<'tcx>,
-        callee: Instance<'tcx>,
+        _: &CallSite<'tcx>,
+        callee_body: &Body<'tcx>,
+        _: &CodegenFnAttrs,
+        _: bool,
     ) -> Result<(), &'static str> {
-        let caller_def_id = caller_body.source.def_id();
-        let callee_def_id = callee.def_id();
-        if callee_def_id == caller_def_id {
-            return Err("self-recursion");
-        }
-
-        match callee.def {
-            InstanceKind::Item(_) => {
-                // If there is no MIR available (either because it was not in metadata or
-                // because it has no MIR because it's an extern function), then the inliner
-                // won't cause cycles on this.
-                if !self.tcx.is_mir_available(callee_def_id) {
-                    return Err("item MIR unavailable");
-                }
-            }
-            // These have no own callable MIR.
-            InstanceKind::Intrinsic(_) | InstanceKind::Virtual(..) => {
-                return Err("instance without MIR (intrinsic / virtual)");
-            }
-
-            // FIXME(#127030): `ConstParamHasTy` has bad interactions with
-            // the drop shim builder, which does not evaluate predicates in
-            // the correct param-env for types being dropped. Stall resolving
-            // the MIR for this instance until all of its const params are
-            // substituted.
-            InstanceKind::DropGlue(_, Some(ty)) if ty.has_type_flags(TypeFlags::HAS_CT_PARAM) => {
-                return Err("still needs substitution");
-            }
-
-            // This cannot result in an immediate cycle since the callee MIR is a shim, which does
-            // not get any optimizations run on it. Any subsequent inlining may cause cycles, but we
-            // do not need to catch this here, we can wait until the inliner decides to continue
-            // inlining a second time.
-            InstanceKind::VTableShim(_)
-            | InstanceKind::ReifyShim(..)
-            | InstanceKind::FnPtrShim(..)
-            | InstanceKind::ClosureOnceShim { .. }
-            | InstanceKind::ConstructCoroutineInClosureShim { .. }
-            | InstanceKind::DropGlue(..)
-            | InstanceKind::CloneShim(..)
-            | InstanceKind::ThreadLocalShim(..)
-            | InstanceKind::FnPtrAddrShim(..)
-            | InstanceKind::AsyncDropGlueCtorShim(..) => return Ok(()),
-        }
+        if let Some(_) = callee_body.tainted_by_errors { Err("body has errors") } else { Ok(()) }
+    }
 
-        if self.tcx.is_constructor(callee_def_id) {
-            trace!("constructors always have MIR");
-            // Constructor functions cannot cause a query cycle.
-            return Ok(());
-        }
+    fn inline_limit_for_block(&self) -> Option<usize> {
+        Some(usize::MAX)
+    }
 
-        if callee_def_id.is_local() {
-            // If we know for sure that the function we're calling will itself try to
-            // call us, then we avoid inlining that function.
-            if self.tcx.mir_callgraph_reachable((callee, caller_def_id.expect_local())) {
-                return Err("caller might be reachable from callee (query cycle avoidance)");
-            }
+    fn on_inline_success(
+        &mut self,
+        callsite: &CallSite<'tcx>,
+        caller_body: &mut Body<'tcx>,
+        new_blocks: std::ops::Range<BasicBlock>,
+    ) {
+        self.changed = true;
 
-            Ok(())
-        } else {
-            // This cannot result in an immediate cycle since the callee MIR is from another crate
-            // and is already optimized. Any subsequent inlining may cause cycles, but we do
-            // not need to catch this here, we can wait until the inliner decides to continue
-            // inlining a second time.
-            trace!("functions from other crates always have MIR");
-            Ok(())
-        }
+        self.history.push(callsite.callee.def_id());
+        process_blocks(self, caller_body, new_blocks);
+        self.history.pop();
     }
 
-    fn resolve_callsite(
-        &self,
-        caller_body: &Body<'tcx>,
-        bb: BasicBlock,
-        bb_data: &BasicBlockData<'tcx>,
-    ) -> Option<CallSite<'tcx>> {
-        // Only consider direct calls to functions
-        let terminator = bb_data.terminator();
-
-        // FIXME(explicit_tail_calls): figure out if we can inline tail calls
-        if let TerminatorKind::Call { ref func, fn_span, .. } = terminator.kind {
-            let func_ty = func.ty(caller_body, self.tcx);
-            if let ty::FnDef(def_id, args) = *func_ty.kind() {
-                // To resolve an instance its args have to be fully normalized.
-                let args = self.tcx.try_normalize_erasing_regions(self.typing_env, args).ok()?;
-                let callee = Instance::try_resolve(self.tcx, self.typing_env, def_id, args)
-                    .ok()
-                    .flatten()?;
-
-                if let InstanceKind::Virtual(..) | InstanceKind::Intrinsic(_) = callee.def {
-                    return None;
-                }
-
-                if self.history.contains(&callee.def_id()) {
-                    return None;
-                }
+    fn on_inline_failure(&self, callsite: &CallSite<'tcx>, reason: &'static str) {
+        let tcx = self.tcx();
+        let InlineAttr::Force { attr_span, reason: justification } =
+            tcx.codegen_fn_attrs(callsite.callee.def_id()).inline
+        else {
+            bug!("called on item without required inlining");
+        };
 
-                let fn_sig = self.tcx.fn_sig(def_id).instantiate(self.tcx, args);
+        let call_span = callsite.source_info.span;
+        tcx.dcx().emit_err(crate::errors::ForceInlineFailure {
+            call_span,
+            attr_span,
+            caller_span: tcx.def_span(self.def_id),
+            caller: tcx.def_path_str(self.def_id),
+            callee_span: tcx.def_span(callsite.callee.def_id()),
+            callee: tcx.def_path_str(callsite.callee.def_id()),
+            reason,
+            justification: justification.map(|sym| crate::errors::ForceInlineJustification { sym }),
+        });
+    }
 
-                // Additionally, check that the body that we're inlining actually agrees
-                // with the ABI of the trait that the item comes from.
-                if let InstanceKind::Item(instance_def_id) = callee.def
-                    && self.tcx.def_kind(instance_def_id) == DefKind::AssocFn
-                    && let instance_fn_sig = self.tcx.fn_sig(instance_def_id).skip_binder()
-                    && instance_fn_sig.abi() != fn_sig.abi()
-                {
-                    return None;
-                }
+    fn on_inline_limit_reached(&self) -> bool {
+        false
+    }
+}
 
-                let source_info = SourceInfo { span: fn_span, ..terminator.source_info };
+struct NormalInliner<'tcx> {
+    tcx: TyCtxt<'tcx>,
+    typing_env: ty::TypingEnv<'tcx>,
+    /// `DefId` of caller.
+    def_id: LocalDefId,
+    /// Stack of inlined instances.
+    /// We only check the `DefId` and not the args because we want to
+    /// avoid inlining cases of polymorphic recursion.
+    /// The number of `DefId`s is finite, so checking history is enough
+    /// to ensure that we do not loop endlessly while inlining.
+    history: Vec<DefId>,
+    /// Indicates that the caller body has been modified.
+    changed: bool,
+    /// Indicates that the caller is #[inline] and just calls another function,
+    /// and thus we can inline less into it as it'll be inlined itself.
+    caller_is_inline_forwarder: bool,
+}
 
-                return Some(CallSite { callee, fn_sig, block: bb, source_info });
-            }
+impl<'tcx> Inliner<'tcx> for NormalInliner<'tcx> {
+    fn new(tcx: TyCtxt<'tcx>, def_id: LocalDefId, body: &Body<'tcx>) -> Self {
+        let typing_env = body.typing_env(tcx);
+        let codegen_fn_attrs = tcx.codegen_fn_attrs(def_id);
+
+        Self {
+            tcx,
+            typing_env,
+            def_id,
+            history: Vec::new(),
+            changed: false,
+            caller_is_inline_forwarder: matches!(
+                codegen_fn_attrs.inline,
+                InlineAttr::Hint | InlineAttr::Always | InlineAttr::Force { .. }
+            ) && body_is_forwarder(body),
         }
-
-        None
     }
 
-    /// Returns an error if inlining is not possible based on codegen attributes alone. A success
-    /// indicates that inlining decision should be based on other criteria.
-    fn check_codegen_attributes(
-        &self,
-        callsite: &CallSite<'tcx>,
-        callee_attrs: &CodegenFnAttrs,
-        cross_crate_inlinable: bool,
-    ) -> Result<(), &'static str> {
-        if self.tcx.has_attr(callsite.callee.def_id(), sym::rustc_no_mir_inline) {
-            return Err("#[rustc_no_mir_inline]");
-        }
+    fn tcx(&self) -> TyCtxt<'tcx> {
+        self.tcx
+    }
 
-        if let InlineAttr::Never = callee_attrs.inline {
-            return Err("never inline hint");
-        }
+    fn caller_def_id(&self) -> LocalDefId {
+        self.def_id
+    }
 
-        // Reachability pass defines which functions are eligible for inlining. Generally inlining
-        // other functions is incorrect because they could reference symbols that aren't exported.
-        let is_generic = callsite.callee.args.non_erasable_generics().next().is_some();
-        if !is_generic && !cross_crate_inlinable {
-            return Err("not exported");
-        }
+    fn typing_env(&self) -> ty::TypingEnv<'tcx> {
+        self.typing_env
+    }
 
-        if callsite.fn_sig.c_variadic() {
-            return Err("C variadic");
-        }
+    fn history(&self) -> &[DefId] {
+        &self.history
+    }
 
-        if callee_attrs.flags.contains(CodegenFnAttrFlags::COLD) {
-            return Err("cold");
-        }
+    fn changed(self) -> bool {
+        self.changed
+    }
 
-        if callee_attrs.no_sanitize != self.codegen_fn_attrs.no_sanitize {
-            return Err("incompatible sanitizer set");
-        }
+    fn should_inline_for_callee(&self, _: DefId) -> bool {
+        true
+    }
 
-        // Two functions are compatible if the callee has no attribute (meaning
-        // that it's codegen agnostic), or sets an attribute that is identical
-        // to this function's attribute.
-        if callee_attrs.instruction_set.is_some()
-            && callee_attrs.instruction_set != self.codegen_fn_attrs.instruction_set
-        {
-            return Err("incompatible instruction set");
+    fn check_caller_mir_body(&self, body: &Body<'tcx>) -> bool {
+        if body.source.promoted.is_some() {
+            return false;
         }
 
-        let callee_feature_names = callee_attrs.target_features.iter().map(|f| f.name);
-        let this_feature_names = self.codegen_fn_attrs.target_features.iter().map(|f| f.name);
-        if callee_feature_names.ne(this_feature_names) {
-            // In general it is not correct to inline a callee with target features that are a
-            // subset of the caller. This is because the callee might contain calls, and the ABI of
-            // those calls depends on the target features of the surrounding function. By moving a
-            // `Call` terminator from one MIR body to another with more target features, we might
-            // change the ABI of that call!
-            return Err("incompatible target features");
+        // Avoid inlining into coroutines, since their `optimized_mir` is used for layout computation,
+        // which can create a cycle, even when no attempt is made to inline the function in the other
+        // direction.
+        if body.coroutine.is_some() {
+            return false;
         }
 
-        Ok(())
+        true
     }
 
-    /// Returns inlining decision that is based on the examination of callee MIR body.
-    /// Assumes that codegen attributes have been checked for compatibility already.
     #[instrument(level = "debug", skip(self, callee_body))]
-    fn check_mir_body(
+    fn check_callee_mir_body(
         &self,
         callsite: &CallSite<'tcx>,
         callee_body: &Body<'tcx>,
         callee_attrs: &CodegenFnAttrs,
         cross_crate_inlinable: bool,
     ) -> Result<(), &'static str> {
-        let tcx = self.tcx;
+        let tcx = self.tcx();
 
         if let Some(_) = callee_body.tainted_by_errors {
-            return Err("Body is tainted");
+            return Err("body has errors");
         }
 
         let mut threshold = if self.caller_is_inline_forwarder {
-            self.tcx.sess.opts.unstable_opts.inline_mir_forwarder_threshold.unwrap_or(30)
+            tcx.sess.opts.unstable_opts.inline_mir_forwarder_threshold.unwrap_or(30)
         } else if cross_crate_inlinable {
-            self.tcx.sess.opts.unstable_opts.inline_mir_hint_threshold.unwrap_or(100)
+            tcx.sess.opts.unstable_opts.inline_mir_hint_threshold.unwrap_or(100)
         } else {
-            self.tcx.sess.opts.unstable_opts.inline_mir_threshold.unwrap_or(50)
+            tcx.sess.opts.unstable_opts.inline_mir_threshold.unwrap_or(50)
         };
 
         // Give a bonus functions with a small number of blocks,
@@ -497,7 +352,7 @@ impl<'tcx> Inliner<'tcx> {
         // FIXME: Give a bonus to functions with only a single caller
 
         let mut checker =
-            CostChecker::new(self.tcx, self.typing_env, Some(callsite.callee), callee_body);
+            CostChecker::new(tcx, self.typing_env(), Some(callsite.callee), callee_body);
 
         checker.add_function_level_costs();
 
@@ -513,20 +368,20 @@ impl<'tcx> Inliner<'tcx> {
             checker.visit_basic_block_data(bb, blk);
 
             let term = blk.terminator();
+            let caller_attrs = tcx.codegen_fn_attrs(self.caller_def_id());
             if let TerminatorKind::Drop { ref place, target, unwind, replace: _ } = term.kind {
                 work_list.push(target);
 
                 // If the place doesn't actually need dropping, treat it like a regular goto.
-                let ty = callsite.callee.instantiate_mir(
-                    self.tcx,
-                    ty::EarlyBinder::bind(&place.ty(callee_body, tcx).ty),
-                );
-                if ty.needs_drop(tcx, self.typing_env)
+                let ty = callsite
+                    .callee
+                    .instantiate_mir(tcx, ty::EarlyBinder::bind(&place.ty(callee_body, tcx).ty));
+                if ty.needs_drop(tcx, self.typing_env())
                     && let UnwindAction::Cleanup(unwind) = unwind
                 {
                     work_list.push(unwind);
                 }
-            } else if callee_attrs.instruction_set != self.codegen_fn_attrs.instruction_set
+            } else if callee_attrs.instruction_set != caller_attrs.instruction_set
                 && matches!(term.kind, TerminatorKind::InlineAsm { .. })
             {
                 // During the attribute checking stage we allow a callee with no
@@ -534,7 +389,7 @@ impl<'tcx> Inliner<'tcx> {
                 // assign one. However, during this stage we require an exact match when any
                 // inline-asm is detected. LLVM will still possibly do an inline later on
                 // if the no-attribute function ends up with the same instruction set anyway.
-                return Err("Cannot move inline-asm across instruction sets");
+                return Err("cannot move inline-asm across instruction sets");
             } else if let TerminatorKind::TailCall { .. } = term.kind {
                 // FIXME(explicit_tail_calls): figure out how exactly functions containing tail
                 // calls can be inlined (and if they even should)
@@ -558,321 +413,710 @@ impl<'tcx> Inliner<'tcx> {
         }
     }
 
-    fn inline_call(
-        &self,
-        caller_body: &mut Body<'tcx>,
+    fn inline_limit_for_block(&self) -> Option<usize> {
+        match self.history.len() {
+            0 => Some(usize::MAX),
+            1..=TOP_DOWN_DEPTH_LIMIT => Some(1),
+            _ => None,
+        }
+    }
+
+    fn on_inline_success(
+        &mut self,
         callsite: &CallSite<'tcx>,
-        mut callee_body: Body<'tcx>,
+        caller_body: &mut Body<'tcx>,
+        new_blocks: std::ops::Range<BasicBlock>,
     ) {
-        let terminator = caller_body[callsite.block].terminator.take().unwrap();
-        let TerminatorKind::Call { func, args, destination, unwind, target, .. } = terminator.kind
-        else {
-            bug!("unexpected terminator kind {:?}", terminator.kind);
-        };
+        self.changed = true;
 
-        let return_block = if let Some(block) = target {
-            // Prepare a new block for code that should execute when call returns. We don't use
-            // target block directly since it might have other predecessors.
-            let data = BasicBlockData::new(
-                Some(Terminator {
-                    source_info: terminator.source_info,
-                    kind: TerminatorKind::Goto { target: block },
-                }),
-                caller_body[block].is_cleanup,
-            );
-            Some(caller_body.basic_blocks_mut().push(data))
-        } else {
-            None
+        self.history.push(callsite.callee.def_id());
+        process_blocks(self, caller_body, new_blocks);
+        self.history.pop();
+    }
+
+    fn on_inline_limit_reached(&self) -> bool {
+        true
+    }
+
+    fn on_inline_failure(&self, _: &CallSite<'tcx>, _: &'static str) {}
+}
+
+fn inline<'tcx, T: Inliner<'tcx>>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) -> bool {
+    let def_id = body.source.def_id().expect_local();
+
+    // Only do inlining into fn bodies.
+    if !tcx.hir().body_owner_kind(def_id).is_fn_or_closure() {
+        return false;
+    }
+
+    let mut inliner = T::new(tcx, def_id, body);
+    if !inliner.check_caller_mir_body(body) {
+        return false;
+    }
+
+    let blocks = START_BLOCK..body.basic_blocks.next_index();
+    process_blocks(&mut inliner, body, blocks);
+    inliner.changed()
+}
+
+fn process_blocks<'tcx, I: Inliner<'tcx>>(
+    inliner: &mut I,
+    caller_body: &mut Body<'tcx>,
+    blocks: Range<BasicBlock>,
+) {
+    let Some(inline_limit) = inliner.inline_limit_for_block() else { return };
+    let mut inlined_count = 0;
+    for bb in blocks {
+        let bb_data = &caller_body[bb];
+        if bb_data.is_cleanup {
+            continue;
+        }
+
+        let Some(callsite) = resolve_callsite(inliner, caller_body, bb, bb_data) else {
+            continue;
         };
 
-        // If the call is something like `a[*i] = f(i)`, where
-        // `i : &mut usize`, then just duplicating the `a[*i]`
-        // Place could result in two different locations if `f`
-        // writes to `i`. To prevent this we need to create a temporary
-        // borrow of the place and pass the destination as `*temp` instead.
-        fn dest_needs_borrow(place: Place<'_>) -> bool {
-            for elem in place.projection.iter() {
-                match elem {
-                    ProjectionElem::Deref | ProjectionElem::Index(_) => return true,
-                    _ => {}
+        let span = trace_span!("process_blocks", %callsite.callee, ?bb);
+        let _guard = span.enter();
+
+        if !inliner.should_inline_for_callee(callsite.callee.def_id()) {
+            debug!("not enabled");
+            continue;
+        }
+
+        match try_inlining(inliner, caller_body, &callsite) {
+            Err(reason) => {
+                debug!("not-inlined {} [{}]", callsite.callee, reason);
+                inliner.on_inline_failure(&callsite, reason);
+            }
+            Ok(new_blocks) => {
+                debug!("inlined {}", callsite.callee);
+                inliner.on_inline_success(&callsite, caller_body, new_blocks);
+
+                inlined_count += 1;
+                if inlined_count == inline_limit {
+                    if inliner.on_inline_limit_reached() {
+                        return;
+                    }
                 }
             }
+        }
+    }
+}
+
+fn resolve_callsite<'tcx, I: Inliner<'tcx>>(
+    inliner: &I,
+    caller_body: &Body<'tcx>,
+    bb: BasicBlock,
+    bb_data: &BasicBlockData<'tcx>,
+) -> Option<CallSite<'tcx>> {
+    let tcx = inliner.tcx();
+    // Only consider direct calls to functions
+    let terminator = bb_data.terminator();
+
+    // FIXME(explicit_tail_calls): figure out if we can inline tail calls
+    if let TerminatorKind::Call { ref func, fn_span, .. } = terminator.kind {
+        let func_ty = func.ty(caller_body, tcx);
+        if let ty::FnDef(def_id, args) = *func_ty.kind() {
+            // To resolve an instance its args have to be fully normalized.
+            let args = tcx.try_normalize_erasing_regions(inliner.typing_env(), args).ok()?;
+            let callee =
+                Instance::try_resolve(tcx, inliner.typing_env(), def_id, args).ok().flatten()?;
+
+            if let InstanceKind::Virtual(..) | InstanceKind::Intrinsic(_) = callee.def {
+                return None;
+            }
 
-            false
+            if inliner.history().contains(&callee.def_id()) {
+                return None;
+            }
+
+            let fn_sig = tcx.fn_sig(def_id).instantiate(tcx, args);
+
+            // Additionally, check that the body that we're inlining actually agrees
+            // with the ABI of the trait that the item comes from.
+            if let InstanceKind::Item(instance_def_id) = callee.def
+                && tcx.def_kind(instance_def_id) == DefKind::AssocFn
+                && let instance_fn_sig = tcx.fn_sig(instance_def_id).skip_binder()
+                && instance_fn_sig.abi() != fn_sig.abi()
+            {
+                return None;
+            }
+
+            let source_info = SourceInfo { span: fn_span, ..terminator.source_info };
+
+            return Some(CallSite { callee, fn_sig, block: bb, source_info });
         }
+    }
 
-        let dest = if dest_needs_borrow(destination) {
-            trace!("creating temp for return destination");
-            let dest = Rvalue::Ref(
-                self.tcx.lifetimes.re_erased,
-                BorrowKind::Mut { kind: MutBorrowKind::Default },
-                destination,
-            );
-            let dest_ty = dest.ty(caller_body, self.tcx);
-            let temp =
-                Place::from(self.new_call_temp(caller_body, callsite, dest_ty, return_block));
-            caller_body[callsite.block].statements.push(Statement {
-                source_info: callsite.source_info,
-                kind: StatementKind::Assign(Box::new((temp, dest))),
-            });
-            self.tcx.mk_place_deref(temp)
-        } else {
-            destination
-        };
+    None
+}
 
-        // Always create a local to hold the destination, as `RETURN_PLACE` may appear
-        // where a full `Place` is not allowed.
-        let (remap_destination, destination_local) = if let Some(d) = dest.as_local() {
-            (false, d)
-        } else {
-            (
-                true,
-                self.new_call_temp(
-                    caller_body,
-                    callsite,
-                    destination.ty(caller_body, self.tcx).ty,
-                    return_block,
-                ),
-            )
-        };
+/// Attempts to inline a callsite into the caller body. When successful returns basic blocks
+/// containing the inlined body. Otherwise returns an error describing why inlining didn't take
+/// place.
+fn try_inlining<'tcx, I: Inliner<'tcx>>(
+    inliner: &I,
+    caller_body: &mut Body<'tcx>,
+    callsite: &CallSite<'tcx>,
+) -> Result<std::ops::Range<BasicBlock>, &'static str> {
+    let tcx = inliner.tcx();
+    check_mir_is_available(inliner, caller_body, callsite.callee)?;
+
+    let callee_attrs = tcx.codegen_fn_attrs(callsite.callee.def_id());
+    let cross_crate_inlinable = tcx.cross_crate_inlinable(callsite.callee.def_id());
+    check_codegen_attributes(inliner, callsite, callee_attrs, cross_crate_inlinable)?;
+
+    // Intrinsic fallback bodies are automatically made cross-crate inlineable,
+    // but at this stage we don't know whether codegen knows the intrinsic,
+    // so just conservatively don't inline it. This also ensures that we do not
+    // accidentally inline the body of an intrinsic that *must* be overridden.
+    if tcx.has_attr(callsite.callee.def_id(), sym::rustc_intrinsic) {
+        return Err("callee is an intrinsic");
+    }
 
-        // Copy the arguments if needed.
-        let args = self.make_call_args(args, callsite, caller_body, &callee_body, return_block);
+    let terminator = caller_body[callsite.block].terminator.as_ref().unwrap();
+    let TerminatorKind::Call { args, destination, .. } = &terminator.kind else { bug!() };
+    let destination_ty = destination.ty(&caller_body.local_decls, tcx).ty;
+    for arg in args {
+        if !arg.node.ty(&caller_body.local_decls, tcx).is_sized(tcx, inliner.typing_env()) {
+            // We do not allow inlining functions with unsized params. Inlining these functions
+            // could create unsized locals, which are unsound and being phased out.
+            return Err("call has unsized argument");
+        }
+    }
 
-        let mut integrator = Integrator {
-            args: &args,
-            new_locals: Local::new(caller_body.local_decls.len())..,
-            new_scopes: SourceScope::new(caller_body.source_scopes.len())..,
-            new_blocks: BasicBlock::new(caller_body.basic_blocks.len())..,
-            destination: destination_local,
-            callsite_scope: caller_body.source_scopes[callsite.source_info.scope].clone(),
-            callsite,
-            cleanup_block: unwind,
-            in_cleanup_block: false,
-            return_block,
-            tcx: self.tcx,
-            always_live_locals: BitSet::new_filled(callee_body.local_decls.len()),
+    let callee_body = try_instance_mir(tcx, callsite.callee.def)?;
+    inliner.check_callee_mir_body(callsite, callee_body, callee_attrs, cross_crate_inlinable)?;
+
+    let Ok(callee_body) = callsite.callee.try_instantiate_mir_and_normalize_erasing_regions(
+        tcx,
+        inliner.typing_env(),
+        ty::EarlyBinder::bind(callee_body.clone()),
+    ) else {
+        debug!("failed to normalize callee body");
+        return Err("implementation limitation");
+    };
+
+    // Normally, this shouldn't be required, but trait normalization failure can create a
+    // validation ICE.
+    if !validate_types(tcx, inliner.typing_env(), &callee_body, &caller_body).is_empty() {
+        debug!("failed to validate callee body");
+        return Err("implementation limitation");
+    }
+
+    // Check call signature compatibility.
+    // Normally, this shouldn't be required, but trait normalization failure can create a
+    // validation ICE.
+    let output_type = callee_body.return_ty();
+    if !util::sub_types(tcx, inliner.typing_env(), output_type, destination_ty) {
+        trace!(?output_type, ?destination_ty);
+        debug!("failed to normalize return type");
+        return Err("implementation limitation");
+    }
+    if callsite.fn_sig.abi() == ExternAbi::RustCall {
+        // FIXME: Don't inline user-written `extern "rust-call"` functions,
+        // since this is generally perf-negative on rustc, and we hope that
+        // LLVM will inline these functions instead.
+        if callee_body.spread_arg.is_some() {
+            return Err("user-written rust-call functions");
+        }
+
+        let (self_arg, arg_tuple) = match &args[..] {
+            [arg_tuple] => (None, arg_tuple),
+            [self_arg, arg_tuple] => (Some(self_arg), arg_tuple),
+            _ => bug!("Expected `rust-call` to have 1 or 2 args"),
         };
 
-        // Map all `Local`s, `SourceScope`s and `BasicBlock`s to new ones
-        // (or existing ones, in a few special cases) in the caller.
-        integrator.visit_body(&mut callee_body);
+        let self_arg_ty = self_arg.map(|self_arg| self_arg.node.ty(&caller_body.local_decls, tcx));
 
-        // If there are any locals without storage markers, give them storage only for the
-        // duration of the call.
-        for local in callee_body.vars_and_temps_iter() {
-            if integrator.always_live_locals.contains(local) {
-                let new_local = integrator.map_local(local);
-                caller_body[callsite.block].statements.push(Statement {
-                    source_info: callsite.source_info,
-                    kind: StatementKind::StorageLive(new_local),
-                });
+        let arg_tuple_ty = arg_tuple.node.ty(&caller_body.local_decls, tcx);
+        let ty::Tuple(arg_tuple_tys) = *arg_tuple_ty.kind() else {
+            bug!("Closure arguments are not passed as a tuple");
+        };
+
+        for (arg_ty, input) in
+            self_arg_ty.into_iter().chain(arg_tuple_tys).zip(callee_body.args_iter())
+        {
+            let input_type = callee_body.local_decls[input].ty;
+            if !util::sub_types(tcx, inliner.typing_env(), input_type, arg_ty) {
+                trace!(?arg_ty, ?input_type);
+                debug!("failed to normalize tuple argument type");
+                return Err("implementation limitation");
             }
         }
-        if let Some(block) = return_block {
-            // To avoid repeated O(n) insert, push any new statements to the end and rotate
-            // the slice once.
-            let mut n = 0;
-            if remap_destination {
-                caller_body[block].statements.push(Statement {
-                    source_info: callsite.source_info,
-                    kind: StatementKind::Assign(Box::new((
-                        dest,
-                        Rvalue::Use(Operand::Move(destination_local.into())),
-                    ))),
-                });
-                n += 1;
+    } else {
+        for (arg, input) in args.iter().zip(callee_body.args_iter()) {
+            let input_type = callee_body.local_decls[input].ty;
+            let arg_ty = arg.node.ty(&caller_body.local_decls, tcx);
+            if !util::sub_types(tcx, inliner.typing_env(), input_type, arg_ty) {
+                trace!(?arg_ty, ?input_type);
+                debug!("failed to normalize argument type");
+                return Err("implementation limitation");
             }
-            for local in callee_body.vars_and_temps_iter().rev() {
-                if integrator.always_live_locals.contains(local) {
-                    let new_local = integrator.map_local(local);
-                    caller_body[block].statements.push(Statement {
-                        source_info: callsite.source_info,
-                        kind: StatementKind::StorageDead(new_local),
-                    });
-                    n += 1;
-                }
+        }
+    }
+
+    let old_blocks = caller_body.basic_blocks.next_index();
+    inline_call(inliner, caller_body, callsite, callee_body);
+    let new_blocks = old_blocks..caller_body.basic_blocks.next_index();
+
+    Ok(new_blocks)
+}
+
+fn check_mir_is_available<'tcx, I: Inliner<'tcx>>(
+    inliner: &I,
+    caller_body: &Body<'tcx>,
+    callee: Instance<'tcx>,
+) -> Result<(), &'static str> {
+    let caller_def_id = caller_body.source.def_id();
+    let callee_def_id = callee.def_id();
+    if callee_def_id == caller_def_id {
+        return Err("self-recursion");
+    }
+
+    match callee.def {
+        InstanceKind::Item(_) => {
+            // If there is no MIR available (either because it was not in metadata or
+            // because it has no MIR because it's an extern function), then the inliner
+            // won't cause cycles on this.
+            if !inliner.tcx().is_mir_available(callee_def_id) {
+                debug!("item MIR unavailable");
+                return Err("implementation limitation");
             }
-            caller_body[block].statements.rotate_right(n);
+        }
+        // These have no own callable MIR.
+        InstanceKind::Intrinsic(_) | InstanceKind::Virtual(..) => {
+            debug!("instance without MIR (intrinsic / virtual)");
+            return Err("implementation limitation");
         }
 
-        // Insert all of the (mapped) parts of the callee body into the caller.
-        caller_body.local_decls.extend(callee_body.drain_vars_and_temps());
-        caller_body.source_scopes.append(&mut callee_body.source_scopes);
-        if self
-            .tcx
-            .sess
-            .opts
-            .unstable_opts
-            .inline_mir_preserve_debug
-            .unwrap_or(self.tcx.sess.opts.debuginfo != DebugInfo::None)
-        {
-            // Note that we need to preserve these in the standard library so that
-            // people working on rust can build with or without debuginfo while
-            // still getting consistent results from the mir-opt tests.
-            caller_body.var_debug_info.append(&mut callee_body.var_debug_info);
+        // FIXME(#127030): `ConstParamHasTy` has bad interactions with
+        // the drop shim builder, which does not evaluate predicates in
+        // the correct param-env for types being dropped. Stall resolving
+        // the MIR for this instance until all of its const params are
+        // substituted.
+        InstanceKind::DropGlue(_, Some(ty)) if ty.has_type_flags(TypeFlags::HAS_CT_PARAM) => {
+            debug!("still needs substitution");
+            return Err("implementation limitation");
         }
-        caller_body.basic_blocks_mut().append(callee_body.basic_blocks_mut());
 
-        caller_body[callsite.block].terminator = Some(Terminator {
-            source_info: callsite.source_info,
-            kind: TerminatorKind::Goto { target: integrator.map_block(START_BLOCK) },
-        });
+        // This cannot result in an immediate cycle since the callee MIR is a shim, which does
+        // not get any optimizations run on it. Any subsequent inlining may cause cycles, but we
+        // do not need to catch this here, we can wait until the inliner decides to continue
+        // inlining a second time.
+        InstanceKind::VTableShim(_)
+        | InstanceKind::ReifyShim(..)
+        | InstanceKind::FnPtrShim(..)
+        | InstanceKind::ClosureOnceShim { .. }
+        | InstanceKind::ConstructCoroutineInClosureShim { .. }
+        | InstanceKind::DropGlue(..)
+        | InstanceKind::CloneShim(..)
+        | InstanceKind::ThreadLocalShim(..)
+        | InstanceKind::FnPtrAddrShim(..)
+        | InstanceKind::AsyncDropGlueCtorShim(..) => return Ok(()),
+    }
 
-        // Copy required constants from the callee_body into the caller_body. Although we are only
-        // pushing unevaluated consts to `required_consts`, here they may have been evaluated
-        // because we are calling `instantiate_and_normalize_erasing_regions` -- so we filter again.
-        caller_body.required_consts.as_mut().unwrap().extend(
-            callee_body.required_consts().into_iter().filter(|ct| ct.const_.is_required_const()),
-        );
-        // Now that we incorporated the callee's `required_consts`, we can remove the callee from
-        // `mentioned_items` -- but we have to take their `mentioned_items` in return. This does
-        // some extra work here to save the monomorphization collector work later. It helps a lot,
-        // since monomorphization can avoid a lot of work when the "mentioned items" are similar to
-        // the actually used items. By doing this we can entirely avoid visiting the callee!
-        // We need to reconstruct the `required_item` for the callee so that we can find and
-        // remove it.
-        let callee_item = MentionedItem::Fn(func.ty(caller_body, self.tcx));
-        let caller_mentioned_items = caller_body.mentioned_items.as_mut().unwrap();
-        if let Some(idx) = caller_mentioned_items.iter().position(|item| item.node == callee_item) {
-            // We found the callee, so remove it and add its items instead.
-            caller_mentioned_items.remove(idx);
-            caller_mentioned_items.extend(callee_body.mentioned_items());
-        } else {
-            // If we can't find the callee, there's no point in adding its items. Probably it
-            // already got removed by being inlined elsewhere in the same function, so we already
-            // took its items.
+    if inliner.tcx().is_constructor(callee_def_id) {
+        trace!("constructors always have MIR");
+        // Constructor functions cannot cause a query cycle.
+        return Ok(());
+    }
+
+    if callee_def_id.is_local() {
+        // If we know for sure that the function we're calling will itself try to
+        // call us, then we avoid inlining that function.
+        if inliner.tcx().mir_callgraph_reachable((callee, caller_def_id.expect_local())) {
+            debug!("query cycle avoidance");
+            return Err("caller might be reachable from callee");
         }
+
+        Ok(())
+    } else {
+        // This cannot result in an immediate cycle since the callee MIR is from another crate
+        // and is already optimized. Any subsequent inlining may cause cycles, but we do
+        // not need to catch this here, we can wait until the inliner decides to continue
+        // inlining a second time.
+        trace!("functions from other crates always have MIR");
+        Ok(())
     }
+}
 
-    fn make_call_args(
-        &self,
-        args: Box<[Spanned<Operand<'tcx>>]>,
-        callsite: &CallSite<'tcx>,
-        caller_body: &mut Body<'tcx>,
-        callee_body: &Body<'tcx>,
-        return_block: Option<BasicBlock>,
-    ) -> Box<[Local]> {
-        let tcx = self.tcx;
-
-        // There is a bit of a mismatch between the *caller* of a closure and the *callee*.
-        // The caller provides the arguments wrapped up in a tuple:
-        //
-        //     tuple_tmp = (a, b, c)
-        //     Fn::call(closure_ref, tuple_tmp)
-        //
-        // meanwhile the closure body expects the arguments (here, `a`, `b`, and `c`)
-        // as distinct arguments. (This is the "rust-call" ABI hack.) Normally, codegen has
-        // the job of unpacking this tuple. But here, we are codegen. =) So we want to create
-        // a vector like
-        //
-        //     [closure_ref, tuple_tmp.0, tuple_tmp.1, tuple_tmp.2]
-        //
-        // Except for one tiny wrinkle: we don't actually want `tuple_tmp.0`. It's more convenient
-        // if we "spill" that into *another* temporary, so that we can map the argument
-        // variable in the callee MIR directly to an argument variable on our side.
-        // So we introduce temporaries like:
-        //
-        //     tmp0 = tuple_tmp.0
-        //     tmp1 = tuple_tmp.1
-        //     tmp2 = tuple_tmp.2
-        //
-        // and the vector is `[closure_ref, tmp0, tmp1, tmp2]`.
-        if callsite.fn_sig.abi() == ExternAbi::RustCall && callee_body.spread_arg.is_none() {
-            // FIXME(edition_2024): switch back to a normal method call.
-            let mut args = <_>::into_iter(args);
-            let self_ = self.create_temp_if_necessary(
-                args.next().unwrap().node,
-                callsite,
-                caller_body,
-                return_block,
-            );
-            let tuple = self.create_temp_if_necessary(
-                args.next().unwrap().node,
-                callsite,
-                caller_body,
-                return_block,
-            );
-            assert!(args.next().is_none());
-
-            let tuple = Place::from(tuple);
-            let ty::Tuple(tuple_tys) = tuple.ty(caller_body, tcx).ty.kind() else {
-                bug!("Closure arguments are not passed as a tuple");
-            };
+/// Returns an error if inlining is not possible based on codegen attributes alone. A success
+/// indicates that inlining decision should be based on other criteria.
+fn check_codegen_attributes<'tcx, I: Inliner<'tcx>>(
+    inliner: &I,
+    callsite: &CallSite<'tcx>,
+    callee_attrs: &CodegenFnAttrs,
+    cross_crate_inlinable: bool,
+) -> Result<(), &'static str> {
+    let tcx = inliner.tcx();
+    if tcx.has_attr(callsite.callee.def_id(), sym::rustc_no_mir_inline) {
+        return Err("#[rustc_no_mir_inline]");
+    }
 
-            // The `closure_ref` in our example above.
-            let closure_ref_arg = iter::once(self_);
+    if let InlineAttr::Never = callee_attrs.inline {
+        return Err("never inline attribute");
+    }
 
-            // The `tmp0`, `tmp1`, and `tmp2` in our example above.
-            let tuple_tmp_args = tuple_tys.iter().enumerate().map(|(i, ty)| {
-                // This is e.g., `tuple_tmp.0` in our example above.
-                let tuple_field = Operand::Move(tcx.mk_place_field(tuple, FieldIdx::new(i), ty));
+    // FIXME(#127234): Coverage instrumentation currently doesn't handle inlined
+    // MIR correctly when Modified Condition/Decision Coverage is enabled.
+    if tcx.sess.instrument_coverage_mcdc() {
+        return Err("incompatible with MC/DC coverage");
+    }
 
-                // Spill to a local to make e.g., `tmp0`.
-                self.create_temp_if_necessary(tuple_field, callsite, caller_body, return_block)
-            });
+    // Reachability pass defines which functions are eligible for inlining. Generally inlining
+    // other functions is incorrect because they could reference symbols that aren't exported.
+    let is_generic = callsite.callee.args.non_erasable_generics().next().is_some();
+    if !is_generic && !cross_crate_inlinable {
+        return Err("not exported");
+    }
 
-            closure_ref_arg.chain(tuple_tmp_args).collect()
-        } else {
-            // FIXME(edition_2024): switch back to a normal method call.
-            <_>::into_iter(args)
-                .map(|a| self.create_temp_if_necessary(a.node, callsite, caller_body, return_block))
-                .collect()
-        }
+    if callsite.fn_sig.c_variadic() {
+        return Err("C variadic");
     }
 
-    /// If `arg` is already a temporary, returns it. Otherwise, introduces a fresh
-    /// temporary `T` and an instruction `T = arg`, and returns `T`.
-    fn create_temp_if_necessary(
-        &self,
-        arg: Operand<'tcx>,
-        callsite: &CallSite<'tcx>,
-        caller_body: &mut Body<'tcx>,
-        return_block: Option<BasicBlock>,
-    ) -> Local {
-        // Reuse the operand if it is a moved temporary.
-        if let Operand::Move(place) = &arg
-            && let Some(local) = place.as_local()
-            && caller_body.local_kind(local) == LocalKind::Temp
-        {
-            return local;
-        }
+    if callee_attrs.flags.contains(CodegenFnAttrFlags::COLD) {
+        return Err("cold");
+    }
 
-        // Otherwise, create a temporary for the argument.
-        trace!("creating temp for argument {:?}", arg);
-        let arg_ty = arg.ty(caller_body, self.tcx);
-        let local = self.new_call_temp(caller_body, callsite, arg_ty, return_block);
-        caller_body[callsite.block].statements.push(Statement {
-            source_info: callsite.source_info,
-            kind: StatementKind::Assign(Box::new((Place::from(local), Rvalue::Use(arg)))),
-        });
-        local
+    let codegen_fn_attrs = tcx.codegen_fn_attrs(inliner.caller_def_id());
+    if callee_attrs.no_sanitize != codegen_fn_attrs.no_sanitize {
+        return Err("incompatible sanitizer set");
     }
 
-    /// Introduces a new temporary into the caller body that is live for the duration of the call.
-    fn new_call_temp(
-        &self,
-        caller_body: &mut Body<'tcx>,
-        callsite: &CallSite<'tcx>,
-        ty: Ty<'tcx>,
-        return_block: Option<BasicBlock>,
-    ) -> Local {
-        let local = caller_body.local_decls.push(LocalDecl::new(ty, callsite.source_info.span));
+    // Two functions are compatible if the callee has no attribute (meaning
+    // that it's codegen agnostic), or sets an attribute that is identical
+    // to this function's attribute.
+    if callee_attrs.instruction_set.is_some()
+        && callee_attrs.instruction_set != codegen_fn_attrs.instruction_set
+    {
+        return Err("incompatible instruction set");
+    }
+
+    let callee_feature_names = callee_attrs.target_features.iter().map(|f| f.name);
+    let this_feature_names = codegen_fn_attrs.target_features.iter().map(|f| f.name);
+    if callee_feature_names.ne(this_feature_names) {
+        // In general it is not correct to inline a callee with target features that are a
+        // subset of the caller. This is because the callee might contain calls, and the ABI of
+        // those calls depends on the target features of the surrounding function. By moving a
+        // `Call` terminator from one MIR body to another with more target features, we might
+        // change the ABI of that call!
+        return Err("incompatible target features");
+    }
 
+    Ok(())
+}
+
+fn inline_call<'tcx, I: Inliner<'tcx>>(
+    inliner: &I,
+    caller_body: &mut Body<'tcx>,
+    callsite: &CallSite<'tcx>,
+    mut callee_body: Body<'tcx>,
+) {
+    let tcx = inliner.tcx();
+    let terminator = caller_body[callsite.block].terminator.take().unwrap();
+    let TerminatorKind::Call { func, args, destination, unwind, target, .. } = terminator.kind
+    else {
+        bug!("unexpected terminator kind {:?}", terminator.kind);
+    };
+
+    let return_block = if let Some(block) = target {
+        // Prepare a new block for code that should execute when call returns. We don't use
+        // target block directly since it might have other predecessors.
+        let data = BasicBlockData::new(
+            Some(Terminator {
+                source_info: terminator.source_info,
+                kind: TerminatorKind::Goto { target: block },
+            }),
+            caller_body[block].is_cleanup,
+        );
+        Some(caller_body.basic_blocks_mut().push(data))
+    } else {
+        None
+    };
+
+    // If the call is something like `a[*i] = f(i)`, where
+    // `i : &mut usize`, then just duplicating the `a[*i]`
+    // Place could result in two different locations if `f`
+    // writes to `i`. To prevent this we need to create a temporary
+    // borrow of the place and pass the destination as `*temp` instead.
+    fn dest_needs_borrow(place: Place<'_>) -> bool {
+        for elem in place.projection.iter() {
+            match elem {
+                ProjectionElem::Deref | ProjectionElem::Index(_) => return true,
+                _ => {}
+            }
+        }
+
+        false
+    }
+
+    let dest = if dest_needs_borrow(destination) {
+        trace!("creating temp for return destination");
+        let dest = Rvalue::Ref(
+            tcx.lifetimes.re_erased,
+            BorrowKind::Mut { kind: MutBorrowKind::Default },
+            destination,
+        );
+        let dest_ty = dest.ty(caller_body, tcx);
+        let temp = Place::from(new_call_temp(caller_body, callsite, dest_ty, return_block));
         caller_body[callsite.block].statements.push(Statement {
             source_info: callsite.source_info,
-            kind: StatementKind::StorageLive(local),
+            kind: StatementKind::Assign(Box::new((temp, dest))),
         });
+        tcx.mk_place_deref(temp)
+    } else {
+        destination
+    };
+
+    // Always create a local to hold the destination, as `RETURN_PLACE` may appear
+    // where a full `Place` is not allowed.
+    let (remap_destination, destination_local) = if let Some(d) = dest.as_local() {
+        (false, d)
+    } else {
+        (
+            true,
+            new_call_temp(caller_body, callsite, destination.ty(caller_body, tcx).ty, return_block),
+        )
+    };
 
-        if let Some(block) = return_block {
-            caller_body[block].statements.insert(0, Statement {
+    // Copy the arguments if needed.
+    let args = make_call_args(inliner, args, callsite, caller_body, &callee_body, return_block);
+
+    let mut integrator = Integrator {
+        args: &args,
+        new_locals: Local::new(caller_body.local_decls.len())..,
+        new_scopes: SourceScope::new(caller_body.source_scopes.len())..,
+        new_blocks: BasicBlock::new(caller_body.basic_blocks.len())..,
+        destination: destination_local,
+        callsite_scope: caller_body.source_scopes[callsite.source_info.scope].clone(),
+        callsite,
+        cleanup_block: unwind,
+        in_cleanup_block: false,
+        return_block,
+        tcx,
+        always_live_locals: BitSet::new_filled(callee_body.local_decls.len()),
+    };
+
+    // Map all `Local`s, `SourceScope`s and `BasicBlock`s to new ones
+    // (or existing ones, in a few special cases) in the caller.
+    integrator.visit_body(&mut callee_body);
+
+    // If there are any locals without storage markers, give them storage only for the
+    // duration of the call.
+    for local in callee_body.vars_and_temps_iter() {
+        if integrator.always_live_locals.contains(local) {
+            let new_local = integrator.map_local(local);
+            caller_body[callsite.block].statements.push(Statement {
                 source_info: callsite.source_info,
-                kind: StatementKind::StorageDead(local),
+                kind: StatementKind::StorageLive(new_local),
             });
         }
+    }
+    if let Some(block) = return_block {
+        // To avoid repeated O(n) insert, push any new statements to the end and rotate
+        // the slice once.
+        let mut n = 0;
+        if remap_destination {
+            caller_body[block].statements.push(Statement {
+                source_info: callsite.source_info,
+                kind: StatementKind::Assign(Box::new((
+                    dest,
+                    Rvalue::Use(Operand::Move(destination_local.into())),
+                ))),
+            });
+            n += 1;
+        }
+        for local in callee_body.vars_and_temps_iter().rev() {
+            if integrator.always_live_locals.contains(local) {
+                let new_local = integrator.map_local(local);
+                caller_body[block].statements.push(Statement {
+                    source_info: callsite.source_info,
+                    kind: StatementKind::StorageDead(new_local),
+                });
+                n += 1;
+            }
+        }
+        caller_body[block].statements.rotate_right(n);
+    }
+
+    // Insert all of the (mapped) parts of the callee body into the caller.
+    caller_body.local_decls.extend(callee_body.drain_vars_and_temps());
+    caller_body.source_scopes.append(&mut callee_body.source_scopes);
+    if tcx
+        .sess
+        .opts
+        .unstable_opts
+        .inline_mir_preserve_debug
+        .unwrap_or(tcx.sess.opts.debuginfo != DebugInfo::None)
+    {
+        // Note that we need to preserve these in the standard library so that
+        // people working on rust can build with or without debuginfo while
+        // still getting consistent results from the mir-opt tests.
+        caller_body.var_debug_info.append(&mut callee_body.var_debug_info);
+    }
+    caller_body.basic_blocks_mut().append(callee_body.basic_blocks_mut());
+
+    caller_body[callsite.block].terminator = Some(Terminator {
+        source_info: callsite.source_info,
+        kind: TerminatorKind::Goto { target: integrator.map_block(START_BLOCK) },
+    });
+
+    // Copy required constants from the callee_body into the caller_body. Although we are only
+    // pushing unevaluated consts to `required_consts`, here they may have been evaluated
+    // because we are calling `instantiate_and_normalize_erasing_regions` -- so we filter again.
+    caller_body.required_consts.as_mut().unwrap().extend(
+        callee_body.required_consts().into_iter().filter(|ct| ct.const_.is_required_const()),
+    );
+    // Now that we incorporated the callee's `required_consts`, we can remove the callee from
+    // `mentioned_items` -- but we have to take their `mentioned_items` in return. This does
+    // some extra work here to save the monomorphization collector work later. It helps a lot,
+    // since monomorphization can avoid a lot of work when the "mentioned items" are similar to
+    // the actually used items. By doing this we can entirely avoid visiting the callee!
+    // We need to reconstruct the `required_item` for the callee so that we can find and
+    // remove it.
+    let callee_item = MentionedItem::Fn(func.ty(caller_body, tcx));
+    let caller_mentioned_items = caller_body.mentioned_items.as_mut().unwrap();
+    if let Some(idx) = caller_mentioned_items.iter().position(|item| item.node == callee_item) {
+        // We found the callee, so remove it and add its items instead.
+        caller_mentioned_items.remove(idx);
+        caller_mentioned_items.extend(callee_body.mentioned_items());
+    } else {
+        // If we can't find the callee, there's no point in adding its items. Probably it
+        // already got removed by being inlined elsewhere in the same function, so we already
+        // took its items.
+    }
+}
+
+fn make_call_args<'tcx, I: Inliner<'tcx>>(
+    inliner: &I,
+    args: Box<[Spanned<Operand<'tcx>>]>,
+    callsite: &CallSite<'tcx>,
+    caller_body: &mut Body<'tcx>,
+    callee_body: &Body<'tcx>,
+    return_block: Option<BasicBlock>,
+) -> Box<[Local]> {
+    let tcx = inliner.tcx();
+
+    // There is a bit of a mismatch between the *caller* of a closure and the *callee*.
+    // The caller provides the arguments wrapped up in a tuple:
+    //
+    //     tuple_tmp = (a, b, c)
+    //     Fn::call(closure_ref, tuple_tmp)
+    //
+    // meanwhile the closure body expects the arguments (here, `a`, `b`, and `c`)
+    // as distinct arguments. (This is the "rust-call" ABI hack.) Normally, codegen has
+    // the job of unpacking this tuple. But here, we are codegen. =) So we want to create
+    // a vector like
+    //
+    //     [closure_ref, tuple_tmp.0, tuple_tmp.1, tuple_tmp.2]
+    //
+    // Except for one tiny wrinkle: we don't actually want `tuple_tmp.0`. It's more convenient
+    // if we "spill" that into *another* temporary, so that we can map the argument
+    // variable in the callee MIR directly to an argument variable on our side.
+    // So we introduce temporaries like:
+    //
+    //     tmp0 = tuple_tmp.0
+    //     tmp1 = tuple_tmp.1
+    //     tmp2 = tuple_tmp.2
+    //
+    // and the vector is `[closure_ref, tmp0, tmp1, tmp2]`.
+    if callsite.fn_sig.abi() == ExternAbi::RustCall && callee_body.spread_arg.is_none() {
+        // FIXME(edition_2024): switch back to a normal method call.
+        let mut args = <_>::into_iter(args);
+        let self_ = create_temp_if_necessary(
+            inliner,
+            args.next().unwrap().node,
+            callsite,
+            caller_body,
+            return_block,
+        );
+        let tuple = create_temp_if_necessary(
+            inliner,
+            args.next().unwrap().node,
+            callsite,
+            caller_body,
+            return_block,
+        );
+        assert!(args.next().is_none());
+
+        let tuple = Place::from(tuple);
+        let ty::Tuple(tuple_tys) = tuple.ty(caller_body, tcx).ty.kind() else {
+            bug!("Closure arguments are not passed as a tuple");
+        };
+
+        // The `closure_ref` in our example above.
+        let closure_ref_arg = iter::once(self_);
+
+        // The `tmp0`, `tmp1`, and `tmp2` in our example above.
+        let tuple_tmp_args = tuple_tys.iter().enumerate().map(|(i, ty)| {
+            // This is e.g., `tuple_tmp.0` in our example above.
+            let tuple_field = Operand::Move(tcx.mk_place_field(tuple, FieldIdx::new(i), ty));
+
+            // Spill to a local to make e.g., `tmp0`.
+            create_temp_if_necessary(inliner, tuple_field, callsite, caller_body, return_block)
+        });
+
+        closure_ref_arg.chain(tuple_tmp_args).collect()
+    } else {
+        // FIXME(edition_2024): switch back to a normal method call.
+        <_>::into_iter(args)
+            .map(|a| create_temp_if_necessary(inliner, a.node, callsite, caller_body, return_block))
+            .collect()
+    }
+}
 
-        local
+/// If `arg` is already a temporary, returns it. Otherwise, introduces a fresh temporary `T` and an
+/// instruction `T = arg`, and returns `T`.
+fn create_temp_if_necessary<'tcx, I: Inliner<'tcx>>(
+    inliner: &I,
+    arg: Operand<'tcx>,
+    callsite: &CallSite<'tcx>,
+    caller_body: &mut Body<'tcx>,
+    return_block: Option<BasicBlock>,
+) -> Local {
+    // Reuse the operand if it is a moved temporary.
+    if let Operand::Move(place) = &arg
+        && let Some(local) = place.as_local()
+        && caller_body.local_kind(local) == LocalKind::Temp
+    {
+        return local;
+    }
+
+    // Otherwise, create a temporary for the argument.
+    trace!("creating temp for argument {:?}", arg);
+    let arg_ty = arg.ty(caller_body, inliner.tcx());
+    let local = new_call_temp(caller_body, callsite, arg_ty, return_block);
+    caller_body[callsite.block].statements.push(Statement {
+        source_info: callsite.source_info,
+        kind: StatementKind::Assign(Box::new((Place::from(local), Rvalue::Use(arg)))),
+    });
+    local
+}
+
+/// Introduces a new temporary into the caller body that is live for the duration of the call.
+fn new_call_temp<'tcx>(
+    caller_body: &mut Body<'tcx>,
+    callsite: &CallSite<'tcx>,
+    ty: Ty<'tcx>,
+    return_block: Option<BasicBlock>,
+) -> Local {
+    let local = caller_body.local_decls.push(LocalDecl::new(ty, callsite.source_info.span));
+
+    caller_body[callsite.block].statements.push(Statement {
+        source_info: callsite.source_info,
+        kind: StatementKind::StorageLive(local),
+    });
+
+    if let Some(block) = return_block {
+        caller_body[block].statements.insert(0, Statement {
+            source_info: callsite.source_info,
+            kind: StatementKind::StorageDead(local),
+        });
     }
+
+    local
 }
 
 /**