diff options
Diffstat (limited to 'compiler/rustc_mir_transform/src')
| -rw-r--r-- | compiler/rustc_mir_transform/src/cross_crate_inline.rs | 7 | ||||
| -rw-r--r-- | compiler/rustc_mir_transform/src/errors.rs | 28 | ||||
| -rw-r--r-- | compiler/rustc_mir_transform/src/inline.rs | 1498 | ||||
| -rw-r--r-- | compiler/rustc_mir_transform/src/lib.rs | 8 | ||||
| -rw-r--r-- | compiler/rustc_mir_transform/src/pass_manager.rs | 10 | ||||
| -rw-r--r-- | compiler/rustc_mir_transform/src/validate.rs | 24 |
6 files changed, 941 insertions, 634 deletions
diff --git a/compiler/rustc_mir_transform/src/cross_crate_inline.rs b/compiler/rustc_mir_transform/src/cross_crate_inline.rs index e1f1dd83f0d..8fce856687c 100644 --- a/compiler/rustc_mir_transform/src/cross_crate_inline.rs +++ b/compiler/rustc_mir_transform/src/cross_crate_inline.rs @@ -46,7 +46,7 @@ fn cross_crate_inlinable(tcx: TyCtxt<'_>, def_id: LocalDefId) -> bool { // #[inline(never)] to force code generation. match codegen_fn_attrs.inline { InlineAttr::Never => return false, - InlineAttr::Hint | InlineAttr::Always => return true, + InlineAttr::Hint | InlineAttr::Always | InlineAttr::Force { .. } => return true, _ => {} } @@ -69,8 +69,9 @@ fn cross_crate_inlinable(tcx: TyCtxt<'_>, def_id: LocalDefId) -> bool { // Don't do any inference if codegen optimizations are disabled and also MIR inlining is not // enabled. This ensures that we do inference even if someone only passes -Zinline-mir, // which is less confusing than having to also enable -Copt-level=1. - if matches!(tcx.sess.opts.optimize, OptLevel::No) && !pm::should_run_pass(tcx, &inline::Inline) - { + let inliner_will_run = pm::should_run_pass(tcx, &inline::Inline) + || inline::ForceInline::should_run_pass_for_callee(tcx, def_id.to_def_id()); + if matches!(tcx.sess.opts.optimize, OptLevel::No) && !inliner_will_run { return false; } diff --git a/compiler/rustc_mir_transform/src/errors.rs b/compiler/rustc_mir_transform/src/errors.rs index 2d9eeddea2e..015633d145f 100644 --- a/compiler/rustc_mir_transform/src/errors.rs +++ b/compiler/rustc_mir_transform/src/errors.rs @@ -4,8 +4,8 @@ use rustc_macros::{Diagnostic, LintDiagnostic, Subdiagnostic}; use rustc_middle::mir::AssertKind; use rustc_middle::ty::TyCtxt; use rustc_session::lint::{self, Lint}; -use rustc_span::Span; use rustc_span::def_id::DefId; +use rustc_span::{Span, Symbol}; use crate::fluent_generated as fluent; @@ -142,3 +142,29 @@ pub(crate) struct MustNotSuspendReason { #[note(mir_transform_note2)] #[help] pub(crate) struct UndefinedTransmute; + +#[derive(Diagnostic)] +#[diag(mir_transform_force_inline)] +#[note] +pub(crate) struct ForceInlineFailure { + #[label(mir_transform_caller)] + pub caller_span: Span, + #[label(mir_transform_callee)] + pub callee_span: Span, + #[label(mir_transform_attr)] + pub attr_span: Span, + #[primary_span] + #[label(mir_transform_call)] + pub call_span: Span, + pub callee: String, + pub caller: String, + pub reason: &'static str, + #[subdiagnostic] + pub justification: Option<ForceInlineJustification>, +} + +#[derive(Subdiagnostic)] +#[note(mir_transform_force_inline_justification)] +pub(crate) struct ForceInlineJustification { + pub sym: Symbol, +} diff --git a/compiler/rustc_mir_transform/src/inline.rs b/compiler/rustc_mir_transform/src/inline.rs index 339acbad6b9..a5a4b5987f5 100644 --- a/compiler/rustc_mir_transform/src/inline.rs +++ b/compiler/rustc_mir_transform/src/inline.rs @@ -6,7 +6,7 @@ use std::ops::{Range, RangeFrom}; use rustc_abi::{ExternAbi, FieldIdx}; use rustc_attr_parsing::InlineAttr; use rustc_hir::def::DefKind; -use rustc_hir::def_id::DefId; +use rustc_hir::def_id::{DefId, LocalDefId}; use rustc_index::Idx; use rustc_index::bit_set::BitSet; use rustc_middle::bug; @@ -29,10 +29,6 @@ pub(crate) mod cycle; const TOP_DOWN_DEPTH_LIMIT: usize = 5; -// Made public so that `mir_drops_elaborated_and_const_checked` can be overridden -// by custom rustc drivers, running all the steps by themselves. See #114628. -pub struct Inline; - #[derive(Clone, Debug)] struct CallSite<'tcx> { callee: Instance<'tcx>, @@ -41,14 +37,12 @@ struct CallSite<'tcx> { source_info: SourceInfo, } +// Made public so that `mir_drops_elaborated_and_const_checked` can be overridden +// by custom rustc drivers, running all the steps by themselves. See #114628. +pub struct Inline; + impl<'tcx> crate::MirPass<'tcx> for Inline { fn is_enabled(&self, sess: &rustc_session::Session) -> bool { - // FIXME(#127234): Coverage instrumentation currently doesn't handle inlined - // MIR correctly when Modified Condition/Decision Coverage is enabled. - if sess.instrument_coverage_mcdc() { - return false; - } - if let Some(enabled) = sess.opts.unstable_opts.inline_mir { return enabled; } @@ -67,7 +61,7 @@ impl<'tcx> crate::MirPass<'tcx> for Inline { fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) { let span = trace_span!("inline", body = %tcx.def_path_str(body.source.def_id())); let _guard = span.enter(); - if inline(tcx, body) { + if inline::<NormalInliner<'tcx>>(tcx, body) { debug!("running simplify cfg on {:?}", body.source); simplify_cfg(body); deref_finder(tcx, body); @@ -75,47 +69,84 @@ impl<'tcx> crate::MirPass<'tcx> for Inline { } } -fn inline<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) -> bool { - let def_id = body.source.def_id().expect_local(); +pub struct ForceInline; - // Only do inlining into fn bodies. - if !tcx.hir().body_owner_kind(def_id).is_fn_or_closure() { - return false; +impl ForceInline { + pub fn should_run_pass_for_callee<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> bool { + matches!(tcx.codegen_fn_attrs(def_id).inline, InlineAttr::Force { .. }) } - if body.source.promoted.is_some() { - return false; +} + +impl<'tcx> crate::MirPass<'tcx> for ForceInline { + fn is_enabled(&self, _: &rustc_session::Session) -> bool { + true } - // Avoid inlining into coroutines, since their `optimized_mir` is used for layout computation, - // which can create a cycle, even when no attempt is made to inline the function in the other - // direction. - if body.coroutine.is_some() { - return false; + + fn can_be_overridden(&self) -> bool { + false } - let typing_env = body.typing_env(tcx); - let codegen_fn_attrs = tcx.codegen_fn_attrs(def_id); + fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) { + let span = trace_span!("force_inline", body = %tcx.def_path_str(body.source.def_id())); + let _guard = span.enter(); + if inline::<ForceInliner<'tcx>>(tcx, body) { + debug!("running simplify cfg on {:?}", body.source); + simplify_cfg(body); + deref_finder(tcx, body); + } + } +} - let mut this = Inliner { - tcx, - typing_env, - codegen_fn_attrs, - history: Vec::new(), - changed: false, - caller_is_inline_forwarder: matches!( - codegen_fn_attrs.inline, - InlineAttr::Hint | InlineAttr::Always - ) && body_is_forwarder(body), - }; - let blocks = START_BLOCK..body.basic_blocks.next_index(); - this.process_blocks(body, blocks); - this.changed +trait Inliner<'tcx> { + fn new(tcx: TyCtxt<'tcx>, def_id: LocalDefId, body: &Body<'tcx>) -> Self; + + fn tcx(&self) -> TyCtxt<'tcx>; + fn typing_env(&self) -> ty::TypingEnv<'tcx>; + fn history(&self) -> &[DefId]; + fn caller_def_id(&self) -> LocalDefId; + + /// Has the caller body been changed? + fn changed(self) -> bool; + + /// Should inlining happen for a given callee? + fn should_inline_for_callee(&self, def_id: DefId) -> bool; + + fn check_caller_mir_body(&self, body: &Body<'tcx>) -> bool; + + /// Returns inlining decision that is based on the examination of callee MIR body. + /// Assumes that codegen attributes have been checked for compatibility already. + fn check_callee_mir_body( + &self, + callsite: &CallSite<'tcx>, + callee_body: &Body<'tcx>, + callee_attrs: &CodegenFnAttrs, + cross_crate_inlinable: bool, + ) -> Result<(), &'static str>; + + // How many callsites in a body are we allowed to inline? We need to limit this in order + // to prevent super-linear growth in MIR size. + fn inline_limit_for_block(&self) -> Option<usize>; + + /// Called when inlining succeeds. + fn on_inline_success( + &mut self, + callsite: &CallSite<'tcx>, + caller_body: &mut Body<'tcx>, + new_blocks: std::ops::Range<BasicBlock>, + ); + + /// Called when inlining failed or was not performed. + fn on_inline_failure(&self, callsite: &CallSite<'tcx>, reason: &'static str); + + /// Called when the inline limit for a body is reached. + fn on_inline_limit_reached(&self) -> bool; } -struct Inliner<'tcx> { +struct ForceInliner<'tcx> { tcx: TyCtxt<'tcx>, typing_env: ty::TypingEnv<'tcx>, - /// Caller codegen attributes. - codegen_fn_attrs: &'tcx CodegenFnAttrs, + /// `DefId` of caller. + def_id: LocalDefId, /// Stack of inlined instances. /// We only check the `DefId` and not the args because we want to /// avoid inlining cases of polymorphic recursion. @@ -124,366 +155,190 @@ struct Inliner<'tcx> { history: Vec<DefId>, /// Indicates that the caller body has been modified. changed: bool, - /// Indicates that the caller is #[inline] and just calls another function, - /// and thus we can inline less into it as it'll be inlined itself. - caller_is_inline_forwarder: bool, } -impl<'tcx> Inliner<'tcx> { - fn process_blocks(&mut self, caller_body: &mut Body<'tcx>, blocks: Range<BasicBlock>) { - // How many callsites in this body are we allowed to inline? We need to limit this in order - // to prevent super-linear growth in MIR size - let inline_limit = match self.history.len() { - 0 => usize::MAX, - 1..=TOP_DOWN_DEPTH_LIMIT => 1, - _ => return, - }; - let mut inlined_count = 0; - for bb in blocks { - let bb_data = &caller_body[bb]; - if bb_data.is_cleanup { - continue; - } - - let Some(callsite) = self.resolve_callsite(caller_body, bb, bb_data) else { - continue; - }; - - let span = trace_span!("process_blocks", %callsite.callee, ?bb); - let _guard = span.enter(); - - match self.try_inlining(caller_body, &callsite) { - Err(reason) => { - debug!("not-inlined {} [{}]", callsite.callee, reason); - } - Ok(new_blocks) => { - debug!("inlined {}", callsite.callee); - self.changed = true; - - self.history.push(callsite.callee.def_id()); - self.process_blocks(caller_body, new_blocks); - self.history.pop(); - - inlined_count += 1; - if inlined_count == inline_limit { - debug!("inline count reached"); - return; - } - } - } - } +impl<'tcx> Inliner<'tcx> for ForceInliner<'tcx> { + fn new(tcx: TyCtxt<'tcx>, def_id: LocalDefId, body: &Body<'tcx>) -> Self { + Self { tcx, typing_env: body.typing_env(tcx), def_id, history: Vec::new(), changed: false } } - /// Attempts to inline a callsite into the caller body. When successful returns basic blocks - /// containing the inlined body. Otherwise returns an error describing why inlining didn't take - /// place. - fn try_inlining( - &self, - caller_body: &mut Body<'tcx>, - callsite: &CallSite<'tcx>, - ) -> Result<std::ops::Range<BasicBlock>, &'static str> { - self.check_mir_is_available(caller_body, callsite.callee)?; - - let callee_attrs = self.tcx.codegen_fn_attrs(callsite.callee.def_id()); - let cross_crate_inlinable = self.tcx.cross_crate_inlinable(callsite.callee.def_id()); - self.check_codegen_attributes(callsite, callee_attrs, cross_crate_inlinable)?; - - // Intrinsic fallback bodies are automatically made cross-crate inlineable, - // but at this stage we don't know whether codegen knows the intrinsic, - // so just conservatively don't inline it. This also ensures that we do not - // accidentally inline the body of an intrinsic that *must* be overridden. - if self.tcx.has_attr(callsite.callee.def_id(), sym::rustc_intrinsic) { - return Err("Callee is an intrinsic, do not inline fallback bodies"); - } - - let terminator = caller_body[callsite.block].terminator.as_ref().unwrap(); - let TerminatorKind::Call { args, destination, .. } = &terminator.kind else { bug!() }; - let destination_ty = destination.ty(&caller_body.local_decls, self.tcx).ty; - for arg in args { - if !arg.node.ty(&caller_body.local_decls, self.tcx).is_sized(self.tcx, self.typing_env) - { - // We do not allow inlining functions with unsized params. Inlining these functions - // could create unsized locals, which are unsound and being phased out. - return Err("Call has unsized argument"); - } - } - - let callee_body = try_instance_mir(self.tcx, callsite.callee.def)?; - self.check_mir_body(callsite, callee_body, callee_attrs, cross_crate_inlinable)?; - - let Ok(callee_body) = callsite.callee.try_instantiate_mir_and_normalize_erasing_regions( - self.tcx, - self.typing_env, - ty::EarlyBinder::bind(callee_body.clone()), - ) else { - return Err("failed to normalize callee body"); - }; - - // Normally, this shouldn't be required, but trait normalization failure can create a - // validation ICE. - if !validate_types(self.tcx, self.typing_env, &callee_body, &caller_body).is_empty() { - return Err("failed to validate callee body"); - } - - // Check call signature compatibility. - // Normally, this shouldn't be required, but trait normalization failure can create a - // validation ICE. - let output_type = callee_body.return_ty(); - if !util::sub_types(self.tcx, self.typing_env, output_type, destination_ty) { - trace!(?output_type, ?destination_ty); - return Err("failed to normalize return type"); - } - if callsite.fn_sig.abi() == ExternAbi::RustCall { - // FIXME: Don't inline user-written `extern "rust-call"` functions, - // since this is generally perf-negative on rustc, and we hope that - // LLVM will inline these functions instead. - if callee_body.spread_arg.is_some() { - return Err("do not inline user-written rust-call functions"); - } + fn tcx(&self) -> TyCtxt<'tcx> { + self.tcx + } - let (self_arg, arg_tuple) = match &args[..] { - [arg_tuple] => (None, arg_tuple), - [self_arg, arg_tuple] => (Some(self_arg), arg_tuple), - _ => bug!("Expected `rust-call` to have 1 or 2 args"), - }; + fn typing_env(&self) -> ty::TypingEnv<'tcx> { + self.typing_env + } - let self_arg_ty = - self_arg.map(|self_arg| self_arg.node.ty(&caller_body.local_decls, self.tcx)); + fn history(&self) -> &[DefId] { + &self.history + } - let arg_tuple_ty = arg_tuple.node.ty(&caller_body.local_decls, self.tcx); - let ty::Tuple(arg_tuple_tys) = *arg_tuple_ty.kind() else { - bug!("Closure arguments are not passed as a tuple"); - }; + fn caller_def_id(&self) -> LocalDefId { + self.def_id + } - for (arg_ty, input) in - self_arg_ty.into_iter().chain(arg_tuple_tys).zip(callee_body.args_iter()) - { - let input_type = callee_body.local_decls[input].ty; - if !util::sub_types(self.tcx, self.typing_env, input_type, arg_ty) { - trace!(?arg_ty, ?input_type); - return Err("failed to normalize tuple argument type"); - } - } - } else { - for (arg, input) in args.iter().zip(callee_body.args_iter()) { - let input_type = callee_body.local_decls[input].ty; - let arg_ty = arg.node.ty(&caller_body.local_decls, self.tcx); - if !util::sub_types(self.tcx, self.typing_env, input_type, arg_ty) { - trace!(?arg_ty, ?input_type); - return Err("failed to normalize argument type"); - } - } - } + fn changed(self) -> bool { + self.changed + } - let old_blocks = caller_body.basic_blocks.next_index(); - self.inline_call(caller_body, callsite, callee_body); - let new_blocks = old_blocks..caller_body.basic_blocks.next_index(); + fn should_inline_for_callee(&self, def_id: DefId) -> bool { + ForceInline::should_run_pass_for_callee(self.tcx(), def_id) + } - Ok(new_blocks) + fn check_caller_mir_body(&self, _: &Body<'tcx>) -> bool { + true } - fn check_mir_is_available( + #[instrument(level = "debug", skip(self, callee_body))] + fn check_callee_mir_body( &self, - caller_body: &Body<'tcx>, - callee: Instance<'tcx>, + _: &CallSite<'tcx>, + callee_body: &Body<'tcx>, + _: &CodegenFnAttrs, + _: bool, ) -> Result<(), &'static str> { - let caller_def_id = caller_body.source.def_id(); - let callee_def_id = callee.def_id(); - if callee_def_id == caller_def_id { - return Err("self-recursion"); - } - - match callee.def { - InstanceKind::Item(_) => { - // If there is no MIR available (either because it was not in metadata or - // because it has no MIR because it's an extern function), then the inliner - // won't cause cycles on this. - if !self.tcx.is_mir_available(callee_def_id) { - return Err("item MIR unavailable"); - } - } - // These have no own callable MIR. - InstanceKind::Intrinsic(_) | InstanceKind::Virtual(..) => { - return Err("instance without MIR (intrinsic / virtual)"); - } - - // FIXME(#127030): `ConstParamHasTy` has bad interactions with - // the drop shim builder, which does not evaluate predicates in - // the correct param-env for types being dropped. Stall resolving - // the MIR for this instance until all of its const params are - // substituted. - InstanceKind::DropGlue(_, Some(ty)) if ty.has_type_flags(TypeFlags::HAS_CT_PARAM) => { - return Err("still needs substitution"); - } - - // This cannot result in an immediate cycle since the callee MIR is a shim, which does - // not get any optimizations run on it. Any subsequent inlining may cause cycles, but we - // do not need to catch this here, we can wait until the inliner decides to continue - // inlining a second time. - InstanceKind::VTableShim(_) - | InstanceKind::ReifyShim(..) - | InstanceKind::FnPtrShim(..) - | InstanceKind::ClosureOnceShim { .. } - | InstanceKind::ConstructCoroutineInClosureShim { .. } - | InstanceKind::DropGlue(..) - | InstanceKind::CloneShim(..) - | InstanceKind::ThreadLocalShim(..) - | InstanceKind::FnPtrAddrShim(..) - | InstanceKind::AsyncDropGlueCtorShim(..) => return Ok(()), - } + if let Some(_) = callee_body.tainted_by_errors { Err("body has errors") } else { Ok(()) } + } - if self.tcx.is_constructor(callee_def_id) { - trace!("constructors always have MIR"); - // Constructor functions cannot cause a query cycle. - return Ok(()); - } + fn inline_limit_for_block(&self) -> Option<usize> { + Some(usize::MAX) + } - if callee_def_id.is_local() { - // If we know for sure that the function we're calling will itself try to - // call us, then we avoid inlining that function. - if self.tcx.mir_callgraph_reachable((callee, caller_def_id.expect_local())) { - return Err("caller might be reachable from callee (query cycle avoidance)"); - } + fn on_inline_success( + &mut self, + callsite: &CallSite<'tcx>, + caller_body: &mut Body<'tcx>, + new_blocks: std::ops::Range<BasicBlock>, + ) { + self.changed = true; - Ok(()) - } else { - // This cannot result in an immediate cycle since the callee MIR is from another crate - // and is already optimized. Any subsequent inlining may cause cycles, but we do - // not need to catch this here, we can wait until the inliner decides to continue - // inlining a second time. - trace!("functions from other crates always have MIR"); - Ok(()) - } + self.history.push(callsite.callee.def_id()); + process_blocks(self, caller_body, new_blocks); + self.history.pop(); } - fn resolve_callsite( - &self, - caller_body: &Body<'tcx>, - bb: BasicBlock, - bb_data: &BasicBlockData<'tcx>, - ) -> Option<CallSite<'tcx>> { - // Only consider direct calls to functions - let terminator = bb_data.terminator(); - - // FIXME(explicit_tail_calls): figure out if we can inline tail calls - if let TerminatorKind::Call { ref func, fn_span, .. } = terminator.kind { - let func_ty = func.ty(caller_body, self.tcx); - if let ty::FnDef(def_id, args) = *func_ty.kind() { - // To resolve an instance its args have to be fully normalized. - let args = self.tcx.try_normalize_erasing_regions(self.typing_env, args).ok()?; - let callee = Instance::try_resolve(self.tcx, self.typing_env, def_id, args) - .ok() - .flatten()?; - - if let InstanceKind::Virtual(..) | InstanceKind::Intrinsic(_) = callee.def { - return None; - } - - if self.history.contains(&callee.def_id()) { - return None; - } + fn on_inline_failure(&self, callsite: &CallSite<'tcx>, reason: &'static str) { + let tcx = self.tcx(); + let InlineAttr::Force { attr_span, reason: justification } = + tcx.codegen_fn_attrs(callsite.callee.def_id()).inline + else { + bug!("called on item without required inlining"); + }; - let fn_sig = self.tcx.fn_sig(def_id).instantiate(self.tcx, args); + let call_span = callsite.source_info.span; + tcx.dcx().emit_err(crate::errors::ForceInlineFailure { + call_span, + attr_span, + caller_span: tcx.def_span(self.def_id), + caller: tcx.def_path_str(self.def_id), + callee_span: tcx.def_span(callsite.callee.def_id()), + callee: tcx.def_path_str(callsite.callee.def_id()), + reason, + justification: justification.map(|sym| crate::errors::ForceInlineJustification { sym }), + }); + } - // Additionally, check that the body that we're inlining actually agrees - // with the ABI of the trait that the item comes from. - if let InstanceKind::Item(instance_def_id) = callee.def - && self.tcx.def_kind(instance_def_id) == DefKind::AssocFn - && let instance_fn_sig = self.tcx.fn_sig(instance_def_id).skip_binder() - && instance_fn_sig.abi() != fn_sig.abi() - { - return None; - } + fn on_inline_limit_reached(&self) -> bool { + false + } +} - let source_info = SourceInfo { span: fn_span, ..terminator.source_info }; +struct NormalInliner<'tcx> { + tcx: TyCtxt<'tcx>, + typing_env: ty::TypingEnv<'tcx>, + /// `DefId` of caller. + def_id: LocalDefId, + /// Stack of inlined instances. + /// We only check the `DefId` and not the args because we want to + /// avoid inlining cases of polymorphic recursion. + /// The number of `DefId`s is finite, so checking history is enough + /// to ensure that we do not loop endlessly while inlining. + history: Vec<DefId>, + /// Indicates that the caller body has been modified. + changed: bool, + /// Indicates that the caller is #[inline] and just calls another function, + /// and thus we can inline less into it as it'll be inlined itself. + caller_is_inline_forwarder: bool, +} - return Some(CallSite { callee, fn_sig, block: bb, source_info }); - } +impl<'tcx> Inliner<'tcx> for NormalInliner<'tcx> { + fn new(tcx: TyCtxt<'tcx>, def_id: LocalDefId, body: &Body<'tcx>) -> Self { + let typing_env = body.typing_env(tcx); + let codegen_fn_attrs = tcx.codegen_fn_attrs(def_id); + + Self { + tcx, + typing_env, + def_id, + history: Vec::new(), + changed: false, + caller_is_inline_forwarder: matches!( + codegen_fn_attrs.inline, + InlineAttr::Hint | InlineAttr::Always | InlineAttr::Force { .. } + ) && body_is_forwarder(body), } - - None } - /// Returns an error if inlining is not possible based on codegen attributes alone. A success - /// indicates that inlining decision should be based on other criteria. - fn check_codegen_attributes( - &self, - callsite: &CallSite<'tcx>, - callee_attrs: &CodegenFnAttrs, - cross_crate_inlinable: bool, - ) -> Result<(), &'static str> { - if self.tcx.has_attr(callsite.callee.def_id(), sym::rustc_no_mir_inline) { - return Err("#[rustc_no_mir_inline]"); - } + fn tcx(&self) -> TyCtxt<'tcx> { + self.tcx + } - if let InlineAttr::Never = callee_attrs.inline { - return Err("never inline hint"); - } + fn caller_def_id(&self) -> LocalDefId { + self.def_id + } - // Reachability pass defines which functions are eligible for inlining. Generally inlining - // other functions is incorrect because they could reference symbols that aren't exported. - let is_generic = callsite.callee.args.non_erasable_generics().next().is_some(); - if !is_generic && !cross_crate_inlinable { - return Err("not exported"); - } + fn typing_env(&self) -> ty::TypingEnv<'tcx> { + self.typing_env + } - if callsite.fn_sig.c_variadic() { - return Err("C variadic"); - } + fn history(&self) -> &[DefId] { + &self.history + } - if callee_attrs.flags.contains(CodegenFnAttrFlags::COLD) { - return Err("cold"); - } + fn changed(self) -> bool { + self.changed + } - if callee_attrs.no_sanitize != self.codegen_fn_attrs.no_sanitize { - return Err("incompatible sanitizer set"); - } + fn should_inline_for_callee(&self, _: DefId) -> bool { + true + } - // Two functions are compatible if the callee has no attribute (meaning - // that it's codegen agnostic), or sets an attribute that is identical - // to this function's attribute. - if callee_attrs.instruction_set.is_some() - && callee_attrs.instruction_set != self.codegen_fn_attrs.instruction_set - { - return Err("incompatible instruction set"); + fn check_caller_mir_body(&self, body: &Body<'tcx>) -> bool { + if body.source.promoted.is_some() { + return false; } - let callee_feature_names = callee_attrs.target_features.iter().map(|f| f.name); - let this_feature_names = self.codegen_fn_attrs.target_features.iter().map(|f| f.name); - if callee_feature_names.ne(this_feature_names) { - // In general it is not correct to inline a callee with target features that are a - // subset of the caller. This is because the callee might contain calls, and the ABI of - // those calls depends on the target features of the surrounding function. By moving a - // `Call` terminator from one MIR body to another with more target features, we might - // change the ABI of that call! - return Err("incompatible target features"); + // Avoid inlining into coroutines, since their `optimized_mir` is used for layout computation, + // which can create a cycle, even when no attempt is made to inline the function in the other + // direction. + if body.coroutine.is_some() { + return false; } - Ok(()) + true } - /// Returns inlining decision that is based on the examination of callee MIR body. - /// Assumes that codegen attributes have been checked for compatibility already. #[instrument(level = "debug", skip(self, callee_body))] - fn check_mir_body( + fn check_callee_mir_body( &self, callsite: &CallSite<'tcx>, callee_body: &Body<'tcx>, callee_attrs: &CodegenFnAttrs, cross_crate_inlinable: bool, ) -> Result<(), &'static str> { - let tcx = self.tcx; + let tcx = self.tcx(); if let Some(_) = callee_body.tainted_by_errors { - return Err("Body is tainted"); + return Err("body has errors"); } let mut threshold = if self.caller_is_inline_forwarder { - self.tcx.sess.opts.unstable_opts.inline_mir_forwarder_threshold.unwrap_or(30) + tcx.sess.opts.unstable_opts.inline_mir_forwarder_threshold.unwrap_or(30) } else if cross_crate_inlinable { - self.tcx.sess.opts.unstable_opts.inline_mir_hint_threshold.unwrap_or(100) + tcx.sess.opts.unstable_opts.inline_mir_hint_threshold.unwrap_or(100) } else { - self.tcx.sess.opts.unstable_opts.inline_mir_threshold.unwrap_or(50) + tcx.sess.opts.unstable_opts.inline_mir_threshold.unwrap_or(50) }; // Give a bonus functions with a small number of blocks, @@ -497,7 +352,7 @@ impl<'tcx> Inliner<'tcx> { // FIXME: Give a bonus to functions with only a single caller let mut checker = - CostChecker::new(self.tcx, self.typing_env, Some(callsite.callee), callee_body); + CostChecker::new(tcx, self.typing_env(), Some(callsite.callee), callee_body); checker.add_function_level_costs(); @@ -513,20 +368,20 @@ impl<'tcx> Inliner<'tcx> { checker.visit_basic_block_data(bb, blk); let term = blk.terminator(); + let caller_attrs = tcx.codegen_fn_attrs(self.caller_def_id()); if let TerminatorKind::Drop { ref place, target, unwind, replace: _ } = term.kind { work_list.push(target); // If the place doesn't actually need dropping, treat it like a regular goto. - let ty = callsite.callee.instantiate_mir( - self.tcx, - ty::EarlyBinder::bind(&place.ty(callee_body, tcx).ty), - ); - if ty.needs_drop(tcx, self.typing_env) + let ty = callsite + .callee + .instantiate_mir(tcx, ty::EarlyBinder::bind(&place.ty(callee_body, tcx).ty)); + if ty.needs_drop(tcx, self.typing_env()) && let UnwindAction::Cleanup(unwind) = unwind { work_list.push(unwind); } - } else if callee_attrs.instruction_set != self.codegen_fn_attrs.instruction_set + } else if callee_attrs.instruction_set != caller_attrs.instruction_set && matches!(term.kind, TerminatorKind::InlineAsm { .. }) { // During the attribute checking stage we allow a callee with no @@ -534,7 +389,7 @@ impl<'tcx> Inliner<'tcx> { // assign one. However, during this stage we require an exact match when any // inline-asm is detected. LLVM will still possibly do an inline later on // if the no-attribute function ends up with the same instruction set anyway. - return Err("Cannot move inline-asm across instruction sets"); + return Err("cannot move inline-asm across instruction sets"); } else if let TerminatorKind::TailCall { .. } = term.kind { // FIXME(explicit_tail_calls): figure out how exactly functions containing tail // calls can be inlined (and if they even should) @@ -558,321 +413,710 @@ impl<'tcx> Inliner<'tcx> { } } - fn inline_call( - &self, - caller_body: &mut Body<'tcx>, + fn inline_limit_for_block(&self) -> Option<usize> { + match self.history.len() { + 0 => Some(usize::MAX), + 1..=TOP_DOWN_DEPTH_LIMIT => Some(1), + _ => None, + } + } + + fn on_inline_success( + &mut self, callsite: &CallSite<'tcx>, - mut callee_body: Body<'tcx>, + caller_body: &mut Body<'tcx>, + new_blocks: std::ops::Range<BasicBlock>, ) { - let terminator = caller_body[callsite.block].terminator.take().unwrap(); - let TerminatorKind::Call { func, args, destination, unwind, target, .. } = terminator.kind - else { - bug!("unexpected terminator kind {:?}", terminator.kind); - }; + self.changed = true; - let return_block = if let Some(block) = target { - // Prepare a new block for code that should execute when call returns. We don't use - // target block directly since it might have other predecessors. - let data = BasicBlockData::new( - Some(Terminator { - source_info: terminator.source_info, - kind: TerminatorKind::Goto { target: block }, - }), - caller_body[block].is_cleanup, - ); - Some(caller_body.basic_blocks_mut().push(data)) - } else { - None + self.history.push(callsite.callee.def_id()); + process_blocks(self, caller_body, new_blocks); + self.history.pop(); + } + + fn on_inline_limit_reached(&self) -> bool { + true + } + + fn on_inline_failure(&self, _: &CallSite<'tcx>, _: &'static str) {} +} + +fn inline<'tcx, T: Inliner<'tcx>>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) -> bool { + let def_id = body.source.def_id().expect_local(); + + // Only do inlining into fn bodies. + if !tcx.hir().body_owner_kind(def_id).is_fn_or_closure() { + return false; + } + + let mut inliner = T::new(tcx, def_id, body); + if !inliner.check_caller_mir_body(body) { + return false; + } + + let blocks = START_BLOCK..body.basic_blocks.next_index(); + process_blocks(&mut inliner, body, blocks); + inliner.changed() +} + +fn process_blocks<'tcx, I: Inliner<'tcx>>( + inliner: &mut I, + caller_body: &mut Body<'tcx>, + blocks: Range<BasicBlock>, +) { + let Some(inline_limit) = inliner.inline_limit_for_block() else { return }; + let mut inlined_count = 0; + for bb in blocks { + let bb_data = &caller_body[bb]; + if bb_data.is_cleanup { + continue; + } + + let Some(callsite) = resolve_callsite(inliner, caller_body, bb, bb_data) else { + continue; }; - // If the call is something like `a[*i] = f(i)`, where - // `i : &mut usize`, then just duplicating the `a[*i]` - // Place could result in two different locations if `f` - // writes to `i`. To prevent this we need to create a temporary - // borrow of the place and pass the destination as `*temp` instead. - fn dest_needs_borrow(place: Place<'_>) -> bool { - for elem in place.projection.iter() { - match elem { - ProjectionElem::Deref | ProjectionElem::Index(_) => return true, - _ => {} + let span = trace_span!("process_blocks", %callsite.callee, ?bb); + let _guard = span.enter(); + + if !inliner.should_inline_for_callee(callsite.callee.def_id()) { + debug!("not enabled"); + continue; + } + + match try_inlining(inliner, caller_body, &callsite) { + Err(reason) => { + debug!("not-inlined {} [{}]", callsite.callee, reason); + inliner.on_inline_failure(&callsite, reason); + } + Ok(new_blocks) => { + debug!("inlined {}", callsite.callee); + inliner.on_inline_success(&callsite, caller_body, new_blocks); + + inlined_count += 1; + if inlined_count == inline_limit { + if inliner.on_inline_limit_reached() { + return; + } } } + } + } +} + +fn resolve_callsite<'tcx, I: Inliner<'tcx>>( + inliner: &I, + caller_body: &Body<'tcx>, + bb: BasicBlock, + bb_data: &BasicBlockData<'tcx>, +) -> Option<CallSite<'tcx>> { + let tcx = inliner.tcx(); + // Only consider direct calls to functions + let terminator = bb_data.terminator(); + + // FIXME(explicit_tail_calls): figure out if we can inline tail calls + if let TerminatorKind::Call { ref func, fn_span, .. } = terminator.kind { + let func_ty = func.ty(caller_body, tcx); + if let ty::FnDef(def_id, args) = *func_ty.kind() { + // To resolve an instance its args have to be fully normalized. + let args = tcx.try_normalize_erasing_regions(inliner.typing_env(), args).ok()?; + let callee = + Instance::try_resolve(tcx, inliner.typing_env(), def_id, args).ok().flatten()?; + + if let InstanceKind::Virtual(..) | InstanceKind::Intrinsic(_) = callee.def { + return None; + } - false + if inliner.history().contains(&callee.def_id()) { + return None; + } + + let fn_sig = tcx.fn_sig(def_id).instantiate(tcx, args); + + // Additionally, check that the body that we're inlining actually agrees + // with the ABI of the trait that the item comes from. + if let InstanceKind::Item(instance_def_id) = callee.def + && tcx.def_kind(instance_def_id) == DefKind::AssocFn + && let instance_fn_sig = tcx.fn_sig(instance_def_id).skip_binder() + && instance_fn_sig.abi() != fn_sig.abi() + { + return None; + } + + let source_info = SourceInfo { span: fn_span, ..terminator.source_info }; + + return Some(CallSite { callee, fn_sig, block: bb, source_info }); } + } - let dest = if dest_needs_borrow(destination) { - trace!("creating temp for return destination"); - let dest = Rvalue::Ref( - self.tcx.lifetimes.re_erased, - BorrowKind::Mut { kind: MutBorrowKind::Default }, - destination, - ); - let dest_ty = dest.ty(caller_body, self.tcx); - let temp = - Place::from(self.new_call_temp(caller_body, callsite, dest_ty, return_block)); - caller_body[callsite.block].statements.push(Statement { - source_info: callsite.source_info, - kind: StatementKind::Assign(Box::new((temp, dest))), - }); - self.tcx.mk_place_deref(temp) - } else { - destination - }; + None +} - // Always create a local to hold the destination, as `RETURN_PLACE` may appear - // where a full `Place` is not allowed. - let (remap_destination, destination_local) = if let Some(d) = dest.as_local() { - (false, d) - } else { - ( - true, - self.new_call_temp( - caller_body, - callsite, - destination.ty(caller_body, self.tcx).ty, - return_block, - ), - ) - }; +/// Attempts to inline a callsite into the caller body. When successful returns basic blocks +/// containing the inlined body. Otherwise returns an error describing why inlining didn't take +/// place. +fn try_inlining<'tcx, I: Inliner<'tcx>>( + inliner: &I, + caller_body: &mut Body<'tcx>, + callsite: &CallSite<'tcx>, +) -> Result<std::ops::Range<BasicBlock>, &'static str> { + let tcx = inliner.tcx(); + check_mir_is_available(inliner, caller_body, callsite.callee)?; + + let callee_attrs = tcx.codegen_fn_attrs(callsite.callee.def_id()); + let cross_crate_inlinable = tcx.cross_crate_inlinable(callsite.callee.def_id()); + check_codegen_attributes(inliner, callsite, callee_attrs, cross_crate_inlinable)?; + + // Intrinsic fallback bodies are automatically made cross-crate inlineable, + // but at this stage we don't know whether codegen knows the intrinsic, + // so just conservatively don't inline it. This also ensures that we do not + // accidentally inline the body of an intrinsic that *must* be overridden. + if tcx.has_attr(callsite.callee.def_id(), sym::rustc_intrinsic) { + return Err("callee is an intrinsic"); + } - // Copy the arguments if needed. - let args = self.make_call_args(args, callsite, caller_body, &callee_body, return_block); + let terminator = caller_body[callsite.block].terminator.as_ref().unwrap(); + let TerminatorKind::Call { args, destination, .. } = &terminator.kind else { bug!() }; + let destination_ty = destination.ty(&caller_body.local_decls, tcx).ty; + for arg in args { + if !arg.node.ty(&caller_body.local_decls, tcx).is_sized(tcx, inliner.typing_env()) { + // We do not allow inlining functions with unsized params. Inlining these functions + // could create unsized locals, which are unsound and being phased out. + return Err("call has unsized argument"); + } + } - let mut integrator = Integrator { - args: &args, - new_locals: Local::new(caller_body.local_decls.len()).., - new_scopes: SourceScope::new(caller_body.source_scopes.len()).., - new_blocks: BasicBlock::new(caller_body.basic_blocks.len()).., - destination: destination_local, - callsite_scope: caller_body.source_scopes[callsite.source_info.scope].clone(), - callsite, - cleanup_block: unwind, - in_cleanup_block: false, - return_block, - tcx: self.tcx, - always_live_locals: BitSet::new_filled(callee_body.local_decls.len()), + let callee_body = try_instance_mir(tcx, callsite.callee.def)?; + inliner.check_callee_mir_body(callsite, callee_body, callee_attrs, cross_crate_inlinable)?; + + let Ok(callee_body) = callsite.callee.try_instantiate_mir_and_normalize_erasing_regions( + tcx, + inliner.typing_env(), + ty::EarlyBinder::bind(callee_body.clone()), + ) else { + debug!("failed to normalize callee body"); + return Err("implementation limitation"); + }; + + // Normally, this shouldn't be required, but trait normalization failure can create a + // validation ICE. + if !validate_types(tcx, inliner.typing_env(), &callee_body, &caller_body).is_empty() { + debug!("failed to validate callee body"); + return Err("implementation limitation"); + } + + // Check call signature compatibility. + // Normally, this shouldn't be required, but trait normalization failure can create a + // validation ICE. + let output_type = callee_body.return_ty(); + if !util::sub_types(tcx, inliner.typing_env(), output_type, destination_ty) { + trace!(?output_type, ?destination_ty); + debug!("failed to normalize return type"); + return Err("implementation limitation"); + } + if callsite.fn_sig.abi() == ExternAbi::RustCall { + // FIXME: Don't inline user-written `extern "rust-call"` functions, + // since this is generally perf-negative on rustc, and we hope that + // LLVM will inline these functions instead. + if callee_body.spread_arg.is_some() { + return Err("user-written rust-call functions"); + } + + let (self_arg, arg_tuple) = match &args[..] { + [arg_tuple] => (None, arg_tuple), + [self_arg, arg_tuple] => (Some(self_arg), arg_tuple), + _ => bug!("Expected `rust-call` to have 1 or 2 args"), }; - // Map all `Local`s, `SourceScope`s and `BasicBlock`s to new ones - // (or existing ones, in a few special cases) in the caller. - integrator.visit_body(&mut callee_body); + let self_arg_ty = self_arg.map(|self_arg| self_arg.node.ty(&caller_body.local_decls, tcx)); - // If there are any locals without storage markers, give them storage only for the - // duration of the call. - for local in callee_body.vars_and_temps_iter() { - if integrator.always_live_locals.contains(local) { - let new_local = integrator.map_local(local); - caller_body[callsite.block].statements.push(Statement { - source_info: callsite.source_info, - kind: StatementKind::StorageLive(new_local), - }); + let arg_tuple_ty = arg_tuple.node.ty(&caller_body.local_decls, tcx); + let ty::Tuple(arg_tuple_tys) = *arg_tuple_ty.kind() else { + bug!("Closure arguments are not passed as a tuple"); + }; + + for (arg_ty, input) in + self_arg_ty.into_iter().chain(arg_tuple_tys).zip(callee_body.args_iter()) + { + let input_type = callee_body.local_decls[input].ty; + if !util::sub_types(tcx, inliner.typing_env(), input_type, arg_ty) { + trace!(?arg_ty, ?input_type); + debug!("failed to normalize tuple argument type"); + return Err("implementation limitation"); } } - if let Some(block) = return_block { - // To avoid repeated O(n) insert, push any new statements to the end and rotate - // the slice once. - let mut n = 0; - if remap_destination { - caller_body[block].statements.push(Statement { - source_info: callsite.source_info, - kind: StatementKind::Assign(Box::new(( - dest, - Rvalue::Use(Operand::Move(destination_local.into())), - ))), - }); - n += 1; + } else { + for (arg, input) in args.iter().zip(callee_body.args_iter()) { + let input_type = callee_body.local_decls[input].ty; + let arg_ty = arg.node.ty(&caller_body.local_decls, tcx); + if !util::sub_types(tcx, inliner.typing_env(), input_type, arg_ty) { + trace!(?arg_ty, ?input_type); + debug!("failed to normalize argument type"); + return Err("implementation limitation"); } - for local in callee_body.vars_and_temps_iter().rev() { - if integrator.always_live_locals.contains(local) { - let new_local = integrator.map_local(local); - caller_body[block].statements.push(Statement { - source_info: callsite.source_info, - kind: StatementKind::StorageDead(new_local), - }); - n += 1; - } + } + } + + let old_blocks = caller_body.basic_blocks.next_index(); + inline_call(inliner, caller_body, callsite, callee_body); + let new_blocks = old_blocks..caller_body.basic_blocks.next_index(); + + Ok(new_blocks) +} + +fn check_mir_is_available<'tcx, I: Inliner<'tcx>>( + inliner: &I, + caller_body: &Body<'tcx>, + callee: Instance<'tcx>, +) -> Result<(), &'static str> { + let caller_def_id = caller_body.source.def_id(); + let callee_def_id = callee.def_id(); + if callee_def_id == caller_def_id { + return Err("self-recursion"); + } + + match callee.def { + InstanceKind::Item(_) => { + // If there is no MIR available (either because it was not in metadata or + // because it has no MIR because it's an extern function), then the inliner + // won't cause cycles on this. + if !inliner.tcx().is_mir_available(callee_def_id) { + debug!("item MIR unavailable"); + return Err("implementation limitation"); } - caller_body[block].statements.rotate_right(n); + } + // These have no own callable MIR. + InstanceKind::Intrinsic(_) | InstanceKind::Virtual(..) => { + debug!("instance without MIR (intrinsic / virtual)"); + return Err("implementation limitation"); } - // Insert all of the (mapped) parts of the callee body into the caller. - caller_body.local_decls.extend(callee_body.drain_vars_and_temps()); - caller_body.source_scopes.append(&mut callee_body.source_scopes); - if self - .tcx - .sess - .opts - .unstable_opts - .inline_mir_preserve_debug - .unwrap_or(self.tcx.sess.opts.debuginfo != DebugInfo::None) - { - // Note that we need to preserve these in the standard library so that - // people working on rust can build with or without debuginfo while - // still getting consistent results from the mir-opt tests. - caller_body.var_debug_info.append(&mut callee_body.var_debug_info); + // FIXME(#127030): `ConstParamHasTy` has bad interactions with + // the drop shim builder, which does not evaluate predicates in + // the correct param-env for types being dropped. Stall resolving + // the MIR for this instance until all of its const params are + // substituted. + InstanceKind::DropGlue(_, Some(ty)) if ty.has_type_flags(TypeFlags::HAS_CT_PARAM) => { + debug!("still needs substitution"); + return Err("implementation limitation"); } - caller_body.basic_blocks_mut().append(callee_body.basic_blocks_mut()); - caller_body[callsite.block].terminator = Some(Terminator { - source_info: callsite.source_info, - kind: TerminatorKind::Goto { target: integrator.map_block(START_BLOCK) }, - }); + // This cannot result in an immediate cycle since the callee MIR is a shim, which does + // not get any optimizations run on it. Any subsequent inlining may cause cycles, but we + // do not need to catch this here, we can wait until the inliner decides to continue + // inlining a second time. + InstanceKind::VTableShim(_) + | InstanceKind::ReifyShim(..) + | InstanceKind::FnPtrShim(..) + | InstanceKind::ClosureOnceShim { .. } + | InstanceKind::ConstructCoroutineInClosureShim { .. } + | InstanceKind::DropGlue(..) + | InstanceKind::CloneShim(..) + | InstanceKind::ThreadLocalShim(..) + | InstanceKind::FnPtrAddrShim(..) + | InstanceKind::AsyncDropGlueCtorShim(..) => return Ok(()), + } - // Copy required constants from the callee_body into the caller_body. Although we are only - // pushing unevaluated consts to `required_consts`, here they may have been evaluated - // because we are calling `instantiate_and_normalize_erasing_regions` -- so we filter again. - caller_body.required_consts.as_mut().unwrap().extend( - callee_body.required_consts().into_iter().filter(|ct| ct.const_.is_required_const()), - ); - // Now that we incorporated the callee's `required_consts`, we can remove the callee from - // `mentioned_items` -- but we have to take their `mentioned_items` in return. This does - // some extra work here to save the monomorphization collector work later. It helps a lot, - // since monomorphization can avoid a lot of work when the "mentioned items" are similar to - // the actually used items. By doing this we can entirely avoid visiting the callee! - // We need to reconstruct the `required_item` for the callee so that we can find and - // remove it. - let callee_item = MentionedItem::Fn(func.ty(caller_body, self.tcx)); - let caller_mentioned_items = caller_body.mentioned_items.as_mut().unwrap(); - if let Some(idx) = caller_mentioned_items.iter().position(|item| item.node == callee_item) { - // We found the callee, so remove it and add its items instead. - caller_mentioned_items.remove(idx); - caller_mentioned_items.extend(callee_body.mentioned_items()); - } else { - // If we can't find the callee, there's no point in adding its items. Probably it - // already got removed by being inlined elsewhere in the same function, so we already - // took its items. + if inliner.tcx().is_constructor(callee_def_id) { + trace!("constructors always have MIR"); + // Constructor functions cannot cause a query cycle. + return Ok(()); + } + + if callee_def_id.is_local() { + // If we know for sure that the function we're calling will itself try to + // call us, then we avoid inlining that function. + if inliner.tcx().mir_callgraph_reachable((callee, caller_def_id.expect_local())) { + debug!("query cycle avoidance"); + return Err("caller might be reachable from callee"); } + + Ok(()) + } else { + // This cannot result in an immediate cycle since the callee MIR is from another crate + // and is already optimized. Any subsequent inlining may cause cycles, but we do + // not need to catch this here, we can wait until the inliner decides to continue + // inlining a second time. + trace!("functions from other crates always have MIR"); + Ok(()) } +} - fn make_call_args( - &self, - args: Box<[Spanned<Operand<'tcx>>]>, - callsite: &CallSite<'tcx>, - caller_body: &mut Body<'tcx>, - callee_body: &Body<'tcx>, - return_block: Option<BasicBlock>, - ) -> Box<[Local]> { - let tcx = self.tcx; - - // There is a bit of a mismatch between the *caller* of a closure and the *callee*. - // The caller provides the arguments wrapped up in a tuple: - // - // tuple_tmp = (a, b, c) - // Fn::call(closure_ref, tuple_tmp) - // - // meanwhile the closure body expects the arguments (here, `a`, `b`, and `c`) - // as distinct arguments. (This is the "rust-call" ABI hack.) Normally, codegen has - // the job of unpacking this tuple. But here, we are codegen. =) So we want to create - // a vector like - // - // [closure_ref, tuple_tmp.0, tuple_tmp.1, tuple_tmp.2] - // - // Except for one tiny wrinkle: we don't actually want `tuple_tmp.0`. It's more convenient - // if we "spill" that into *another* temporary, so that we can map the argument - // variable in the callee MIR directly to an argument variable on our side. - // So we introduce temporaries like: - // - // tmp0 = tuple_tmp.0 - // tmp1 = tuple_tmp.1 - // tmp2 = tuple_tmp.2 - // - // and the vector is `[closure_ref, tmp0, tmp1, tmp2]`. - if callsite.fn_sig.abi() == ExternAbi::RustCall && callee_body.spread_arg.is_none() { - // FIXME(edition_2024): switch back to a normal method call. - let mut args = <_>::into_iter(args); - let self_ = self.create_temp_if_necessary( - args.next().unwrap().node, - callsite, - caller_body, - return_block, - ); - let tuple = self.create_temp_if_necessary( - args.next().unwrap().node, - callsite, - caller_body, - return_block, - ); - assert!(args.next().is_none()); - - let tuple = Place::from(tuple); - let ty::Tuple(tuple_tys) = tuple.ty(caller_body, tcx).ty.kind() else { - bug!("Closure arguments are not passed as a tuple"); - }; +/// Returns an error if inlining is not possible based on codegen attributes alone. A success +/// indicates that inlining decision should be based on other criteria. +fn check_codegen_attributes<'tcx, I: Inliner<'tcx>>( + inliner: &I, + callsite: &CallSite<'tcx>, + callee_attrs: &CodegenFnAttrs, + cross_crate_inlinable: bool, +) -> Result<(), &'static str> { + let tcx = inliner.tcx(); + if tcx.has_attr(callsite.callee.def_id(), sym::rustc_no_mir_inline) { + return Err("#[rustc_no_mir_inline]"); + } - // The `closure_ref` in our example above. - let closure_ref_arg = iter::once(self_); + if let InlineAttr::Never = callee_attrs.inline { + return Err("never inline attribute"); + } - // The `tmp0`, `tmp1`, and `tmp2` in our example above. - let tuple_tmp_args = tuple_tys.iter().enumerate().map(|(i, ty)| { - // This is e.g., `tuple_tmp.0` in our example above. - let tuple_field = Operand::Move(tcx.mk_place_field(tuple, FieldIdx::new(i), ty)); + // FIXME(#127234): Coverage instrumentation currently doesn't handle inlined + // MIR correctly when Modified Condition/Decision Coverage is enabled. + if tcx.sess.instrument_coverage_mcdc() { + return Err("incompatible with MC/DC coverage"); + } - // Spill to a local to make e.g., `tmp0`. - self.create_temp_if_necessary(tuple_field, callsite, caller_body, return_block) - }); + // Reachability pass defines which functions are eligible for inlining. Generally inlining + // other functions is incorrect because they could reference symbols that aren't exported. + let is_generic = callsite.callee.args.non_erasable_generics().next().is_some(); + if !is_generic && !cross_crate_inlinable { + return Err("not exported"); + } - closure_ref_arg.chain(tuple_tmp_args).collect() - } else { - // FIXME(edition_2024): switch back to a normal method call. - <_>::into_iter(args) - .map(|a| self.create_temp_if_necessary(a.node, callsite, caller_body, return_block)) - .collect() - } + if callsite.fn_sig.c_variadic() { + return Err("C variadic"); } - /// If `arg` is already a temporary, returns it. Otherwise, introduces a fresh - /// temporary `T` and an instruction `T = arg`, and returns `T`. - fn create_temp_if_necessary( - &self, - arg: Operand<'tcx>, - callsite: &CallSite<'tcx>, - caller_body: &mut Body<'tcx>, - return_block: Option<BasicBlock>, - ) -> Local { - // Reuse the operand if it is a moved temporary. - if let Operand::Move(place) = &arg - && let Some(local) = place.as_local() - && caller_body.local_kind(local) == LocalKind::Temp - { - return local; - } + if callee_attrs.flags.contains(CodegenFnAttrFlags::COLD) { + return Err("cold"); + } - // Otherwise, create a temporary for the argument. - trace!("creating temp for argument {:?}", arg); - let arg_ty = arg.ty(caller_body, self.tcx); - let local = self.new_call_temp(caller_body, callsite, arg_ty, return_block); - caller_body[callsite.block].statements.push(Statement { - source_info: callsite.source_info, - kind: StatementKind::Assign(Box::new((Place::from(local), Rvalue::Use(arg)))), - }); - local + let codegen_fn_attrs = tcx.codegen_fn_attrs(inliner.caller_def_id()); + if callee_attrs.no_sanitize != codegen_fn_attrs.no_sanitize { + return Err("incompatible sanitizer set"); } - /// Introduces a new temporary into the caller body that is live for the duration of the call. - fn new_call_temp( - &self, - caller_body: &mut Body<'tcx>, - callsite: &CallSite<'tcx>, - ty: Ty<'tcx>, - return_block: Option<BasicBlock>, - ) -> Local { - let local = caller_body.local_decls.push(LocalDecl::new(ty, callsite.source_info.span)); + // Two functions are compatible if the callee has no attribute (meaning + // that it's codegen agnostic), or sets an attribute that is identical + // to this function's attribute. + if callee_attrs.instruction_set.is_some() + && callee_attrs.instruction_set != codegen_fn_attrs.instruction_set + { + return Err("incompatible instruction set"); + } + + let callee_feature_names = callee_attrs.target_features.iter().map(|f| f.name); + let this_feature_names = codegen_fn_attrs.target_features.iter().map(|f| f.name); + if callee_feature_names.ne(this_feature_names) { + // In general it is not correct to inline a callee with target features that are a + // subset of the caller. This is because the callee might contain calls, and the ABI of + // those calls depends on the target features of the surrounding function. By moving a + // `Call` terminator from one MIR body to another with more target features, we might + // change the ABI of that call! + return Err("incompatible target features"); + } + Ok(()) +} + +fn inline_call<'tcx, I: Inliner<'tcx>>( + inliner: &I, + caller_body: &mut Body<'tcx>, + callsite: &CallSite<'tcx>, + mut callee_body: Body<'tcx>, +) { + let tcx = inliner.tcx(); + let terminator = caller_body[callsite.block].terminator.take().unwrap(); + let TerminatorKind::Call { func, args, destination, unwind, target, .. } = terminator.kind + else { + bug!("unexpected terminator kind {:?}", terminator.kind); + }; + + let return_block = if let Some(block) = target { + // Prepare a new block for code that should execute when call returns. We don't use + // target block directly since it might have other predecessors. + let data = BasicBlockData::new( + Some(Terminator { + source_info: terminator.source_info, + kind: TerminatorKind::Goto { target: block }, + }), + caller_body[block].is_cleanup, + ); + Some(caller_body.basic_blocks_mut().push(data)) + } else { + None + }; + + // If the call is something like `a[*i] = f(i)`, where + // `i : &mut usize`, then just duplicating the `a[*i]` + // Place could result in two different locations if `f` + // writes to `i`. To prevent this we need to create a temporary + // borrow of the place and pass the destination as `*temp` instead. + fn dest_needs_borrow(place: Place<'_>) -> bool { + for elem in place.projection.iter() { + match elem { + ProjectionElem::Deref | ProjectionElem::Index(_) => return true, + _ => {} + } + } + + false + } + + let dest = if dest_needs_borrow(destination) { + trace!("creating temp for return destination"); + let dest = Rvalue::Ref( + tcx.lifetimes.re_erased, + BorrowKind::Mut { kind: MutBorrowKind::Default }, + destination, + ); + let dest_ty = dest.ty(caller_body, tcx); + let temp = Place::from(new_call_temp(caller_body, callsite, dest_ty, return_block)); caller_body[callsite.block].statements.push(Statement { source_info: callsite.source_info, - kind: StatementKind::StorageLive(local), + kind: StatementKind::Assign(Box::new((temp, dest))), }); + tcx.mk_place_deref(temp) + } else { + destination + }; + + // Always create a local to hold the destination, as `RETURN_PLACE` may appear + // where a full `Place` is not allowed. + let (remap_destination, destination_local) = if let Some(d) = dest.as_local() { + (false, d) + } else { + ( + true, + new_call_temp(caller_body, callsite, destination.ty(caller_body, tcx).ty, return_block), + ) + }; - if let Some(block) = return_block { - caller_body[block].statements.insert(0, Statement { + // Copy the arguments if needed. + let args = make_call_args(inliner, args, callsite, caller_body, &callee_body, return_block); + + let mut integrator = Integrator { + args: &args, + new_locals: Local::new(caller_body.local_decls.len()).., + new_scopes: SourceScope::new(caller_body.source_scopes.len()).., + new_blocks: BasicBlock::new(caller_body.basic_blocks.len()).., + destination: destination_local, + callsite_scope: caller_body.source_scopes[callsite.source_info.scope].clone(), + callsite, + cleanup_block: unwind, + in_cleanup_block: false, + return_block, + tcx, + always_live_locals: BitSet::new_filled(callee_body.local_decls.len()), + }; + + // Map all `Local`s, `SourceScope`s and `BasicBlock`s to new ones + // (or existing ones, in a few special cases) in the caller. + integrator.visit_body(&mut callee_body); + + // If there are any locals without storage markers, give them storage only for the + // duration of the call. + for local in callee_body.vars_and_temps_iter() { + if integrator.always_live_locals.contains(local) { + let new_local = integrator.map_local(local); + caller_body[callsite.block].statements.push(Statement { source_info: callsite.source_info, - kind: StatementKind::StorageDead(local), + kind: StatementKind::StorageLive(new_local), }); } + } + if let Some(block) = return_block { + // To avoid repeated O(n) insert, push any new statements to the end and rotate + // the slice once. + let mut n = 0; + if remap_destination { + caller_body[block].statements.push(Statement { + source_info: callsite.source_info, + kind: StatementKind::Assign(Box::new(( + dest, + Rvalue::Use(Operand::Move(destination_local.into())), + ))), + }); + n += 1; + } + for local in callee_body.vars_and_temps_iter().rev() { + if integrator.always_live_locals.contains(local) { + let new_local = integrator.map_local(local); + caller_body[block].statements.push(Statement { + source_info: callsite.source_info, + kind: StatementKind::StorageDead(new_local), + }); + n += 1; + } + } + caller_body[block].statements.rotate_right(n); + } + + // Insert all of the (mapped) parts of the callee body into the caller. + caller_body.local_decls.extend(callee_body.drain_vars_and_temps()); + caller_body.source_scopes.append(&mut callee_body.source_scopes); + if tcx + .sess + .opts + .unstable_opts + .inline_mir_preserve_debug + .unwrap_or(tcx.sess.opts.debuginfo != DebugInfo::None) + { + // Note that we need to preserve these in the standard library so that + // people working on rust can build with or without debuginfo while + // still getting consistent results from the mir-opt tests. + caller_body.var_debug_info.append(&mut callee_body.var_debug_info); + } + caller_body.basic_blocks_mut().append(callee_body.basic_blocks_mut()); + + caller_body[callsite.block].terminator = Some(Terminator { + source_info: callsite.source_info, + kind: TerminatorKind::Goto { target: integrator.map_block(START_BLOCK) }, + }); + + // Copy required constants from the callee_body into the caller_body. Although we are only + // pushing unevaluated consts to `required_consts`, here they may have been evaluated + // because we are calling `instantiate_and_normalize_erasing_regions` -- so we filter again. + caller_body.required_consts.as_mut().unwrap().extend( + callee_body.required_consts().into_iter().filter(|ct| ct.const_.is_required_const()), + ); + // Now that we incorporated the callee's `required_consts`, we can remove the callee from + // `mentioned_items` -- but we have to take their `mentioned_items` in return. This does + // some extra work here to save the monomorphization collector work later. It helps a lot, + // since monomorphization can avoid a lot of work when the "mentioned items" are similar to + // the actually used items. By doing this we can entirely avoid visiting the callee! + // We need to reconstruct the `required_item` for the callee so that we can find and + // remove it. + let callee_item = MentionedItem::Fn(func.ty(caller_body, tcx)); + let caller_mentioned_items = caller_body.mentioned_items.as_mut().unwrap(); + if let Some(idx) = caller_mentioned_items.iter().position(|item| item.node == callee_item) { + // We found the callee, so remove it and add its items instead. + caller_mentioned_items.remove(idx); + caller_mentioned_items.extend(callee_body.mentioned_items()); + } else { + // If we can't find the callee, there's no point in adding its items. Probably it + // already got removed by being inlined elsewhere in the same function, so we already + // took its items. + } +} + +fn make_call_args<'tcx, I: Inliner<'tcx>>( + inliner: &I, + args: Box<[Spanned<Operand<'tcx>>]>, + callsite: &CallSite<'tcx>, + caller_body: &mut Body<'tcx>, + callee_body: &Body<'tcx>, + return_block: Option<BasicBlock>, +) -> Box<[Local]> { + let tcx = inliner.tcx(); + + // There is a bit of a mismatch between the *caller* of a closure and the *callee*. + // The caller provides the arguments wrapped up in a tuple: + // + // tuple_tmp = (a, b, c) + // Fn::call(closure_ref, tuple_tmp) + // + // meanwhile the closure body expects the arguments (here, `a`, `b`, and `c`) + // as distinct arguments. (This is the "rust-call" ABI hack.) Normally, codegen has + // the job of unpacking this tuple. But here, we are codegen. =) So we want to create + // a vector like + // + // [closure_ref, tuple_tmp.0, tuple_tmp.1, tuple_tmp.2] + // + // Except for one tiny wrinkle: we don't actually want `tuple_tmp.0`. It's more convenient + // if we "spill" that into *another* temporary, so that we can map the argument + // variable in the callee MIR directly to an argument variable on our side. + // So we introduce temporaries like: + // + // tmp0 = tuple_tmp.0 + // tmp1 = tuple_tmp.1 + // tmp2 = tuple_tmp.2 + // + // and the vector is `[closure_ref, tmp0, tmp1, tmp2]`. + if callsite.fn_sig.abi() == ExternAbi::RustCall && callee_body.spread_arg.is_none() { + // FIXME(edition_2024): switch back to a normal method call. + let mut args = <_>::into_iter(args); + let self_ = create_temp_if_necessary( + inliner, + args.next().unwrap().node, + callsite, + caller_body, + return_block, + ); + let tuple = create_temp_if_necessary( + inliner, + args.next().unwrap().node, + callsite, + caller_body, + return_block, + ); + assert!(args.next().is_none()); + + let tuple = Place::from(tuple); + let ty::Tuple(tuple_tys) = tuple.ty(caller_body, tcx).ty.kind() else { + bug!("Closure arguments are not passed as a tuple"); + }; + + // The `closure_ref` in our example above. + let closure_ref_arg = iter::once(self_); + + // The `tmp0`, `tmp1`, and `tmp2` in our example above. + let tuple_tmp_args = tuple_tys.iter().enumerate().map(|(i, ty)| { + // This is e.g., `tuple_tmp.0` in our example above. + let tuple_field = Operand::Move(tcx.mk_place_field(tuple, FieldIdx::new(i), ty)); + + // Spill to a local to make e.g., `tmp0`. + create_temp_if_necessary(inliner, tuple_field, callsite, caller_body, return_block) + }); + + closure_ref_arg.chain(tuple_tmp_args).collect() + } else { + // FIXME(edition_2024): switch back to a normal method call. + <_>::into_iter(args) + .map(|a| create_temp_if_necessary(inliner, a.node, callsite, caller_body, return_block)) + .collect() + } +} - local +/// If `arg` is already a temporary, returns it. Otherwise, introduces a fresh temporary `T` and an +/// instruction `T = arg`, and returns `T`. +fn create_temp_if_necessary<'tcx, I: Inliner<'tcx>>( + inliner: &I, + arg: Operand<'tcx>, + callsite: &CallSite<'tcx>, + caller_body: &mut Body<'tcx>, + return_block: Option<BasicBlock>, +) -> Local { + // Reuse the operand if it is a moved temporary. + if let Operand::Move(place) = &arg + && let Some(local) = place.as_local() + && caller_body.local_kind(local) == LocalKind::Temp + { + return local; + } + + // Otherwise, create a temporary for the argument. + trace!("creating temp for argument {:?}", arg); + let arg_ty = arg.ty(caller_body, inliner.tcx()); + let local = new_call_temp(caller_body, callsite, arg_ty, return_block); + caller_body[callsite.block].statements.push(Statement { + source_info: callsite.source_info, + kind: StatementKind::Assign(Box::new((Place::from(local), Rvalue::Use(arg)))), + }); + local +} + +/// Introduces a new temporary into the caller body that is live for the duration of the call. +fn new_call_temp<'tcx>( + caller_body: &mut Body<'tcx>, + callsite: &CallSite<'tcx>, + ty: Ty<'tcx>, + return_block: Option<BasicBlock>, +) -> Local { + let local = caller_body.local_decls.push(LocalDecl::new(ty, callsite.source_info.span)); + + caller_body[callsite.block].statements.push(Statement { + source_info: callsite.source_info, + kind: StatementKind::StorageLive(local), + }); + + if let Some(block) = return_block { + caller_body[block].statements.insert(0, Statement { + source_info: callsite.source_info, + kind: StatementKind::StorageDead(local), + }); } + + local } /** diff --git a/compiler/rustc_mir_transform/src/lib.rs b/compiler/rustc_mir_transform/src/lib.rs index e1fba9be5bb..350929ffaa5 100644 --- a/compiler/rustc_mir_transform/src/lib.rs +++ b/compiler/rustc_mir_transform/src/lib.rs @@ -141,7 +141,7 @@ declare_passes! { mod gvn : GVN; // Made public so that `mir_drops_elaborated_and_const_checked` can be overridden // by custom rustc drivers, running all the steps by themselves. See #114628. - pub mod inline : Inline; + pub mod inline : Inline, ForceInline; mod instsimplify : InstSimplify { BeforeInline, AfterSimplifyCfg }; mod jump_threading : JumpThreading; mod known_panics_lint : KnownPanicsLint; @@ -488,7 +488,9 @@ fn mir_drops_elaborated_and_const_checked(tcx: TyCtxt<'_>, def: LocalDefId) -> & let is_fn_like = tcx.def_kind(def).is_fn_like(); if is_fn_like { // Do not compute the mir call graph without said call graph actually being used. - if pm::should_run_pass(tcx, &inline::Inline) { + if pm::should_run_pass(tcx, &inline::Inline) + || inline::ForceInline::should_run_pass_for_callee(tcx, def.to_def_id()) + { tcx.ensure_with_value().mir_inliner_callees(ty::InstanceKind::Item(def.to_def_id())); } } @@ -664,6 +666,8 @@ fn run_optimization_passes<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) { // Perform instsimplify before inline to eliminate some trivial calls (like clone // shims). &instsimplify::InstSimplify::BeforeInline, + // Perform inlining of `#[rustc_force_inline]`-annotated callees. + &inline::ForceInline, // Perform inlining, which may add a lot of code. &inline::Inline, // Code from other crates may have storage markers, so this needs to happen after diff --git a/compiler/rustc_mir_transform/src/pass_manager.rs b/compiler/rustc_mir_transform/src/pass_manager.rs index 8a45ce0762d..c3f0a989ce1 100644 --- a/compiler/rustc_mir_transform/src/pass_manager.rs +++ b/compiler/rustc_mir_transform/src/pass_manager.rs @@ -79,6 +79,12 @@ pub(super) trait MirPass<'tcx> { true } + /// Returns `true` if this pass can be overridden by `-Zenable-mir-passes`. This should be + /// true for basically every pass other than those that are necessary for correctness. + fn can_be_overridden(&self) -> bool { + true + } + fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>); fn is_mir_dump_enabled(&self) -> bool { @@ -176,6 +182,10 @@ where { let name = pass.name(); + if !pass.can_be_overridden() { + return pass.is_enabled(tcx.sess); + } + let overridden_passes = &tcx.sess.opts.unstable_opts.mir_enable_passes; let overridden = overridden_passes.iter().rev().find(|(s, _)| s == &*name).map(|(_name, polarity)| { diff --git a/compiler/rustc_mir_transform/src/validate.rs b/compiler/rustc_mir_transform/src/validate.rs index a670da94fcc..9444c5b61dd 100644 --- a/compiler/rustc_mir_transform/src/validate.rs +++ b/compiler/rustc_mir_transform/src/validate.rs @@ -1,6 +1,7 @@ //! Validates the MIR to ensure that invariants are upheld. use rustc_abi::{ExternAbi, FIRST_VARIANT, Size}; +use rustc_attr_parsing::InlineAttr; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_hir::LangItem; use rustc_index::IndexVec; @@ -79,7 +80,7 @@ impl<'tcx> crate::MirPass<'tcx> for Validator { cfg_checker.fail(location, msg); } - if let MirPhase::Runtime(_) = body.phase { + if let MirPhase::Runtime(phase) = body.phase { if let ty::InstanceKind::Item(_) = body.source.instance { if body.has_free_regions() { cfg_checker.fail( @@ -88,6 +89,27 @@ impl<'tcx> crate::MirPass<'tcx> for Validator { ); } } + + if phase >= RuntimePhase::Optimized + && body + .basic_blocks + .iter() + .filter_map(|bb| match &bb.terminator().kind { + TerminatorKind::Call { func, .. } + | TerminatorKind::TailCall { func, .. } => Some(func), + _ => None, + }) + .filter_map(|func| match func.ty(&body.local_decls, tcx).kind() { + ty::FnDef(did, ..) => Some(did), + _ => None, + }) + .any(|did| matches!(tcx.codegen_fn_attrs(did).inline, InlineAttr::Force { .. })) + { + cfg_checker.fail( + Location::START, + "`#[rustc_force_inline]`-annotated function not inlined", + ); + } } } } |
