diff options
Diffstat (limited to 'compiler')
24 files changed, 1138 insertions, 670 deletions
diff --git a/compiler/rustc_attr_data_structures/src/attributes.rs b/compiler/rustc_attr_data_structures/src/attributes.rs index 8986bec57de..def6b16ee8a 100644 --- a/compiler/rustc_attr_data_structures/src/attributes.rs +++ b/compiler/rustc_attr_data_structures/src/attributes.rs @@ -11,6 +11,22 @@ pub enum InlineAttr { Hint, Always, Never, + /// `#[rustc_force_inline]` forces inlining to happen in the MIR inliner - it reports an error + /// if the inlining cannot happen. It is limited to only free functions so that the calls + /// can always be resolved. + Force { + attr_span: Span, + reason: Option<Symbol>, + }, +} + +impl InlineAttr { + pub fn always(&self) -> bool { + match self { + InlineAttr::Always | InlineAttr::Force { .. } => true, + InlineAttr::None | InlineAttr::Hint | InlineAttr::Never => false, + } + } } #[derive(Clone, Encodable, Decodable, Debug, PartialEq, Eq, HashStable_Generic)] diff --git a/compiler/rustc_codegen_gcc/src/attributes.rs b/compiler/rustc_codegen_gcc/src/attributes.rs index 028a5ab5f71..69b04dd5796 100644 --- a/compiler/rustc_codegen_gcc/src/attributes.rs +++ b/compiler/rustc_codegen_gcc/src/attributes.rs @@ -20,7 +20,7 @@ fn inline_attr<'gcc, 'tcx>( ) -> Option<FnAttribute<'gcc>> { match inline { InlineAttr::Hint => Some(FnAttribute::Inline), - InlineAttr::Always => Some(FnAttribute::AlwaysInline), + InlineAttr::Always | InlineAttr::Force { .. } => Some(FnAttribute::AlwaysInline), InlineAttr::Never => { if cx.sess().target.arch != "amdgpu" { Some(FnAttribute::NoInline) diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs index f8454fd9960..0250064e55f 100644 --- a/compiler/rustc_codegen_llvm/src/attributes.rs +++ b/compiler/rustc_codegen_llvm/src/attributes.rs @@ -37,7 +37,9 @@ fn inline_attr<'ll>(cx: &CodegenCx<'ll, '_>, inline: InlineAttr) -> Option<&'ll } match inline { InlineAttr::Hint => Some(AttributeKind::InlineHint.create_attr(cx.llcx)), - InlineAttr::Always => Some(AttributeKind::AlwaysInline.create_attr(cx.llcx)), + InlineAttr::Always | InlineAttr::Force { .. } => { + Some(AttributeKind::AlwaysInline.create_attr(cx.llcx)) + } InlineAttr::Never => { if cx.sess().target.arch != "amdgpu" { Some(AttributeKind::NoInline.create_attr(cx.llcx)) diff --git a/compiler/rustc_codegen_ssa/src/codegen_attrs.rs b/compiler/rustc_codegen_ssa/src/codegen_attrs.rs index cdb72aba36f..d8f62d543ad 100644 --- a/compiler/rustc_codegen_ssa/src/codegen_attrs.rs +++ b/compiler/rustc_codegen_ssa/src/codegen_attrs.rs @@ -18,6 +18,7 @@ use rustc_session::parse::feature_err; use rustc_session::{Session, lint}; use rustc_span::{Ident, Span, sym}; use rustc_target::spec::{SanitizerSet, abi}; +use tracing::debug; use crate::errors; use crate::target_features::{check_target_feature_trait_unsafe, from_target_feature_attr}; @@ -522,26 +523,36 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs { mixed_export_name_no_mangle_lint_state.lint_if_mixed(tcx); codegen_fn_attrs.inline = attrs.iter().fold(InlineAttr::None, |ia, attr| { - if !attr.has_name(sym::inline) { - return ia; - } - if attr.is_word() { - InlineAttr::Hint - } else if let Some(ref items) = attr.meta_item_list() { - inline_span = Some(attr.span); - if items.len() != 1 { - struct_span_code_err!(tcx.dcx(), attr.span, E0534, "expected one argument").emit(); - InlineAttr::None - } else if list_contains_name(items, sym::always) { - InlineAttr::Always - } else if list_contains_name(items, sym::never) { - InlineAttr::Never - } else { - struct_span_code_err!(tcx.dcx(), items[0].span(), E0535, "invalid argument") - .with_help("valid inline arguments are `always` and `never`") - .emit(); + if attr.has_name(sym::inline) { + if attr.is_word() { + InlineAttr::Hint + } else if let Some(ref items) = attr.meta_item_list() { + inline_span = Some(attr.span); + if items.len() != 1 { + struct_span_code_err!(tcx.dcx(), attr.span, E0534, "expected one argument").emit(); + InlineAttr::None + } else if list_contains_name(items, sym::always) { + InlineAttr::Always + } else if list_contains_name(items, sym::never) { + InlineAttr::Never + } else { + struct_span_code_err!(tcx.dcx(), items[0].span(), E0535, "invalid argument") + .with_help("valid inline arguments are `always` and `never`") + .emit(); - InlineAttr::None + InlineAttr::None + } + } else { + ia + } + } else if attr.has_name(sym::rustc_force_inline) && tcx.features().rustc_attrs() { + if attr.is_word() { + InlineAttr::Force { attr_span: attr.span, reason: None } + } else if let Some(val) = attr.value_str() { + InlineAttr::Force { attr_span: attr.span, reason: Some(val) } + } else { + debug!("`rustc_force_inline` not checked by attribute validation"); + ia } } else { ia @@ -596,7 +607,7 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs { // is probably a poor usage of `#[inline(always)]` and easily avoided by not using the attribute. if tcx.features().target_feature_11() && tcx.is_closure_like(did.to_def_id()) - && codegen_fn_attrs.inline != InlineAttr::Always + && !codegen_fn_attrs.inline.always() { let owner_id = tcx.parent(did.to_def_id()); if tcx.def_kind(owner_id).has_codegen_attrs() { @@ -606,11 +617,15 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs { } } - // If a function uses #[target_feature] it can't be inlined into general + // If a function uses `#[target_feature]` it can't be inlined into general // purpose functions as they wouldn't have the right target features - // enabled. For that reason we also forbid #[inline(always)] as it can't be + // enabled. For that reason we also forbid `#[inline(always)]` as it can't be // respected. - if !codegen_fn_attrs.target_features.is_empty() && codegen_fn_attrs.inline == InlineAttr::Always + // + // `#[rustc_force_inline]` doesn't need to be prohibited here, that + // is implemented entirely in rustc can attempt to inline and error if it cannot. + if !codegen_fn_attrs.target_features.is_empty() + && matches!(codegen_fn_attrs.inline, InlineAttr::Always) { if let Some(span) = inline_span { tcx.dcx().span_err( @@ -621,7 +636,7 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs { } } - if !codegen_fn_attrs.no_sanitize.is_empty() && codegen_fn_attrs.inline == InlineAttr::Always { + if !codegen_fn_attrs.no_sanitize.is_empty() && codegen_fn_attrs.inline.always() { if let (Some(no_sanitize_span), Some(inline_span)) = (no_sanitize_span, inline_span) { let hir_id = tcx.local_def_id_to_hir_id(did); tcx.node_span_lint( diff --git a/compiler/rustc_feature/src/builtin_attrs.rs b/compiler/rustc_feature/src/builtin_attrs.rs index 3c3bbc8c31d..c28a4360f6f 100644 --- a/compiler/rustc_feature/src/builtin_attrs.rs +++ b/compiler/rustc_feature/src/builtin_attrs.rs @@ -1019,6 +1019,10 @@ pub const BUILTIN_ATTRIBUTES: &[BuiltinAttribute] = &[ rustc_no_mir_inline, Normal, template!(Word), WarnFollowing, EncodeCrossCrate::Yes, "#[rustc_no_mir_inline] prevents the MIR inliner from inlining a function while not affecting codegen" ), + rustc_attr!( + rustc_force_inline, Normal, template!(Word, NameValueStr: "reason"), WarnFollowing, EncodeCrossCrate::Yes, + "#![rustc_force_inline] forces a free function to be inlined" + ), // ========================================================================== // Internal attributes, Testing: diff --git a/compiler/rustc_hir_typeck/src/coercion.rs b/compiler/rustc_hir_typeck/src/coercion.rs index bd26be11279..9ebc7a4657e 100644 --- a/compiler/rustc_hir_typeck/src/coercion.rs +++ b/compiler/rustc_hir_typeck/src/coercion.rs @@ -38,6 +38,7 @@ use std::ops::Deref; use rustc_abi::ExternAbi; +use rustc_attr_parsing::InlineAttr; use rustc_errors::codes::*; use rustc_errors::{Applicability, Diag, struct_span_code_err}; use rustc_hir as hir; @@ -926,8 +927,13 @@ impl<'f, 'tcx> Coerce<'f, 'tcx> { return Err(TypeError::IntrinsicCast); } - // Safe `#[target_feature]` functions are not assignable to safe fn pointers (RFC 2396). + let fn_attrs = self.tcx.codegen_fn_attrs(def_id); + if matches!(fn_attrs.inline, InlineAttr::Force { .. }) { + return Err(TypeError::ForceInlineCast); + } + // Safe `#[target_feature]` functions are not assignable to safe fn pointers + // (RFC 2396). if b_hdr.safety.is_safe() && !self.tcx.codegen_fn_attrs(def_id).target_features.is_empty() { @@ -1197,6 +1203,17 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { return Ok(prev_ty); } + let is_force_inline = |ty: Ty<'tcx>| { + if let ty::FnDef(did, _) = ty.kind() { + matches!(self.tcx.codegen_fn_attrs(did).inline, InlineAttr::Force { .. }) + } else { + false + } + }; + if is_force_inline(prev_ty) || is_force_inline(new_ty) { + return Err(TypeError::ForceInlineCast); + } + // Special-case that coercion alone cannot handle: // Function items or non-capturing closures of differing IDs or GenericArgs. let (a_sig, b_sig) = { diff --git a/compiler/rustc_middle/src/mir/mono.rs b/compiler/rustc_middle/src/mir/mono.rs index 27168b2a9f2..111c3b6956a 100644 --- a/compiler/rustc_middle/src/mir/mono.rs +++ b/compiler/rustc_middle/src/mir/mono.rs @@ -132,9 +132,10 @@ impl<'tcx> MonoItem<'tcx> { // creating one copy of this `#[inline]` function which may // conflict with upstream crates as it could be an exported // symbol. - match tcx.codegen_fn_attrs(instance.def_id()).inline { - InlineAttr::Always => InstantiationMode::LocalCopy, - _ => InstantiationMode::GloballyShared { may_conflict: true }, + if tcx.codegen_fn_attrs(instance.def_id()).inline.always() { + InstantiationMode::LocalCopy + } else { + InstantiationMode::GloballyShared { may_conflict: true } } } MonoItem::Static(..) | MonoItem::GlobalAsm(..) => { diff --git a/compiler/rustc_middle/src/ty/error.rs b/compiler/rustc_middle/src/ty/error.rs index 714094db053..35fbaa99569 100644 --- a/compiler/rustc_middle/src/ty/error.rs +++ b/compiler/rustc_middle/src/ty/error.rs @@ -109,6 +109,9 @@ impl<'tcx> TypeError<'tcx> { TypeError::ConstMismatch(ref values) => { format!("expected `{}`, found `{}`", values.expected, values.found).into() } + TypeError::ForceInlineCast => { + "cannot coerce functions which must be inlined to function pointers".into() + } TypeError::IntrinsicCast => "cannot coerce intrinsics to function pointers".into(), TypeError::TargetFeatureCast(_) => { "cannot coerce functions with `#[target_feature]` to safe function pointers".into() diff --git a/compiler/rustc_mir_transform/messages.ftl b/compiler/rustc_mir_transform/messages.ftl index d00bfc66a6a..b0c023cca82 100644 --- a/compiler/rustc_mir_transform/messages.ftl +++ b/compiler/rustc_mir_transform/messages.ftl @@ -19,6 +19,17 @@ mir_transform_ffi_unwind_call = call to {$foreign -> mir_transform_fn_item_ref = taking a reference to a function item does not give a function pointer .suggestion = cast `{$ident}` to obtain a function pointer +mir_transform_force_inline = + `{$callee}` could not be inlined into `{$caller}` but is required to be inlined + .call = ...`{$callee}` called here + .attr = inlining due to this annotation + .caller = within `{$caller}`... + .callee = `{$callee}` defined here + .note = could not be inlined due to: {$reason} + +mir_transform_force_inline_justification = + `{$callee}` is required to be inlined to: {$sym} + mir_transform_must_not_suspend = {$pre}`{$def_path}`{$post} held across a suspend point, but should not be .label = the value is held across this suspend point .note = {$reason} diff --git a/compiler/rustc_mir_transform/src/cross_crate_inline.rs b/compiler/rustc_mir_transform/src/cross_crate_inline.rs index e1f1dd83f0d..8fce856687c 100644 --- a/compiler/rustc_mir_transform/src/cross_crate_inline.rs +++ b/compiler/rustc_mir_transform/src/cross_crate_inline.rs @@ -46,7 +46,7 @@ fn cross_crate_inlinable(tcx: TyCtxt<'_>, def_id: LocalDefId) -> bool { // #[inline(never)] to force code generation. match codegen_fn_attrs.inline { InlineAttr::Never => return false, - InlineAttr::Hint | InlineAttr::Always => return true, + InlineAttr::Hint | InlineAttr::Always | InlineAttr::Force { .. } => return true, _ => {} } @@ -69,8 +69,9 @@ fn cross_crate_inlinable(tcx: TyCtxt<'_>, def_id: LocalDefId) -> bool { // Don't do any inference if codegen optimizations are disabled and also MIR inlining is not // enabled. This ensures that we do inference even if someone only passes -Zinline-mir, // which is less confusing than having to also enable -Copt-level=1. - if matches!(tcx.sess.opts.optimize, OptLevel::No) && !pm::should_run_pass(tcx, &inline::Inline) - { + let inliner_will_run = pm::should_run_pass(tcx, &inline::Inline) + || inline::ForceInline::should_run_pass_for_callee(tcx, def_id.to_def_id()); + if matches!(tcx.sess.opts.optimize, OptLevel::No) && !inliner_will_run { return false; } diff --git a/compiler/rustc_mir_transform/src/errors.rs b/compiler/rustc_mir_transform/src/errors.rs index 2d9eeddea2e..015633d145f 100644 --- a/compiler/rustc_mir_transform/src/errors.rs +++ b/compiler/rustc_mir_transform/src/errors.rs @@ -4,8 +4,8 @@ use rustc_macros::{Diagnostic, LintDiagnostic, Subdiagnostic}; use rustc_middle::mir::AssertKind; use rustc_middle::ty::TyCtxt; use rustc_session::lint::{self, Lint}; -use rustc_span::Span; use rustc_span::def_id::DefId; +use rustc_span::{Span, Symbol}; use crate::fluent_generated as fluent; @@ -142,3 +142,29 @@ pub(crate) struct MustNotSuspendReason { #[note(mir_transform_note2)] #[help] pub(crate) struct UndefinedTransmute; + +#[derive(Diagnostic)] +#[diag(mir_transform_force_inline)] +#[note] +pub(crate) struct ForceInlineFailure { + #[label(mir_transform_caller)] + pub caller_span: Span, + #[label(mir_transform_callee)] + pub callee_span: Span, + #[label(mir_transform_attr)] + pub attr_span: Span, + #[primary_span] + #[label(mir_transform_call)] + pub call_span: Span, + pub callee: String, + pub caller: String, + pub reason: &'static str, + #[subdiagnostic] + pub justification: Option<ForceInlineJustification>, +} + +#[derive(Subdiagnostic)] +#[note(mir_transform_force_inline_justification)] +pub(crate) struct ForceInlineJustification { + pub sym: Symbol, +} diff --git a/compiler/rustc_mir_transform/src/inline.rs b/compiler/rustc_mir_transform/src/inline.rs index 339acbad6b9..a5a4b5987f5 100644 --- a/compiler/rustc_mir_transform/src/inline.rs +++ b/compiler/rustc_mir_transform/src/inline.rs @@ -6,7 +6,7 @@ use std::ops::{Range, RangeFrom}; use rustc_abi::{ExternAbi, FieldIdx}; use rustc_attr_parsing::InlineAttr; use rustc_hir::def::DefKind; -use rustc_hir::def_id::DefId; +use rustc_hir::def_id::{DefId, LocalDefId}; use rustc_index::Idx; use rustc_index::bit_set::BitSet; use rustc_middle::bug; @@ -29,10 +29,6 @@ pub(crate) mod cycle; const TOP_DOWN_DEPTH_LIMIT: usize = 5; -// Made public so that `mir_drops_elaborated_and_const_checked` can be overridden -// by custom rustc drivers, running all the steps by themselves. See #114628. -pub struct Inline; - #[derive(Clone, Debug)] struct CallSite<'tcx> { callee: Instance<'tcx>, @@ -41,14 +37,12 @@ struct CallSite<'tcx> { source_info: SourceInfo, } +// Made public so that `mir_drops_elaborated_and_const_checked` can be overridden +// by custom rustc drivers, running all the steps by themselves. See #114628. +pub struct Inline; + impl<'tcx> crate::MirPass<'tcx> for Inline { fn is_enabled(&self, sess: &rustc_session::Session) -> bool { - // FIXME(#127234): Coverage instrumentation currently doesn't handle inlined - // MIR correctly when Modified Condition/Decision Coverage is enabled. - if sess.instrument_coverage_mcdc() { - return false; - } - if let Some(enabled) = sess.opts.unstable_opts.inline_mir { return enabled; } @@ -67,7 +61,7 @@ impl<'tcx> crate::MirPass<'tcx> for Inline { fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) { let span = trace_span!("inline", body = %tcx.def_path_str(body.source.def_id())); let _guard = span.enter(); - if inline(tcx, body) { + if inline::<NormalInliner<'tcx>>(tcx, body) { debug!("running simplify cfg on {:?}", body.source); simplify_cfg(body); deref_finder(tcx, body); @@ -75,47 +69,84 @@ impl<'tcx> crate::MirPass<'tcx> for Inline { } } -fn inline<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) -> bool { - let def_id = body.source.def_id().expect_local(); +pub struct ForceInline; - // Only do inlining into fn bodies. - if !tcx.hir().body_owner_kind(def_id).is_fn_or_closure() { - return false; +impl ForceInline { + pub fn should_run_pass_for_callee<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> bool { + matches!(tcx.codegen_fn_attrs(def_id).inline, InlineAttr::Force { .. }) } - if body.source.promoted.is_some() { - return false; +} + +impl<'tcx> crate::MirPass<'tcx> for ForceInline { + fn is_enabled(&self, _: &rustc_session::Session) -> bool { + true } - // Avoid inlining into coroutines, since their `optimized_mir` is used for layout computation, - // which can create a cycle, even when no attempt is made to inline the function in the other - // direction. - if body.coroutine.is_some() { - return false; + + fn can_be_overridden(&self) -> bool { + false } - let typing_env = body.typing_env(tcx); - let codegen_fn_attrs = tcx.codegen_fn_attrs(def_id); + fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) { + let span = trace_span!("force_inline", body = %tcx.def_path_str(body.source.def_id())); + let _guard = span.enter(); + if inline::<ForceInliner<'tcx>>(tcx, body) { + debug!("running simplify cfg on {:?}", body.source); + simplify_cfg(body); + deref_finder(tcx, body); + } + } +} - let mut this = Inliner { - tcx, - typing_env, - codegen_fn_attrs, - history: Vec::new(), - changed: false, - caller_is_inline_forwarder: matches!( - codegen_fn_attrs.inline, - InlineAttr::Hint | InlineAttr::Always - ) && body_is_forwarder(body), - }; - let blocks = START_BLOCK..body.basic_blocks.next_index(); - this.process_blocks(body, blocks); - this.changed +trait Inliner<'tcx> { + fn new(tcx: TyCtxt<'tcx>, def_id: LocalDefId, body: &Body<'tcx>) -> Self; + + fn tcx(&self) -> TyCtxt<'tcx>; + fn typing_env(&self) -> ty::TypingEnv<'tcx>; + fn history(&self) -> &[DefId]; + fn caller_def_id(&self) -> LocalDefId; + + /// Has the caller body been changed? + fn changed(self) -> bool; + + /// Should inlining happen for a given callee? + fn should_inline_for_callee(&self, def_id: DefId) -> bool; + + fn check_caller_mir_body(&self, body: &Body<'tcx>) -> bool; + + /// Returns inlining decision that is based on the examination of callee MIR body. + /// Assumes that codegen attributes have been checked for compatibility already. + fn check_callee_mir_body( + &self, + callsite: &CallSite<'tcx>, + callee_body: &Body<'tcx>, + callee_attrs: &CodegenFnAttrs, + cross_crate_inlinable: bool, + ) -> Result<(), &'static str>; + + // How many callsites in a body are we allowed to inline? We need to limit this in order + // to prevent super-linear growth in MIR size. + fn inline_limit_for_block(&self) -> Option<usize>; + + /// Called when inlining succeeds. + fn on_inline_success( + &mut self, + callsite: &CallSite<'tcx>, + caller_body: &mut Body<'tcx>, + new_blocks: std::ops::Range<BasicBlock>, + ); + + /// Called when inlining failed or was not performed. + fn on_inline_failure(&self, callsite: &CallSite<'tcx>, reason: &'static str); + + /// Called when the inline limit for a body is reached. + fn on_inline_limit_reached(&self) -> bool; } -struct Inliner<'tcx> { +struct ForceInliner<'tcx> { tcx: TyCtxt<'tcx>, typing_env: ty::TypingEnv<'tcx>, - /// Caller codegen attributes. - codegen_fn_attrs: &'tcx CodegenFnAttrs, + /// `DefId` of caller. + def_id: LocalDefId, /// Stack of inlined instances. /// We only check the `DefId` and not the args because we want to /// avoid inlining cases of polymorphic recursion. @@ -124,366 +155,190 @@ struct Inliner<'tcx> { history: Vec<DefId>, /// Indicates that the caller body has been modified. changed: bool, - /// Indicates that the caller is #[inline] and just calls another function, - /// and thus we can inline less into it as it'll be inlined itself. - caller_is_inline_forwarder: bool, } -impl<'tcx> Inliner<'tcx> { - fn process_blocks(&mut self, caller_body: &mut Body<'tcx>, blocks: Range<BasicBlock>) { - // How many callsites in this body are we allowed to inline? We need to limit this in order - // to prevent super-linear growth in MIR size - let inline_limit = match self.history.len() { - 0 => usize::MAX, - 1..=TOP_DOWN_DEPTH_LIMIT => 1, - _ => return, - }; - let mut inlined_count = 0; - for bb in blocks { - let bb_data = &caller_body[bb]; - if bb_data.is_cleanup { - continue; - } - - let Some(callsite) = self.resolve_callsite(caller_body, bb, bb_data) else { - continue; - }; - - let span = trace_span!("process_blocks", %callsite.callee, ?bb); - let _guard = span.enter(); - - match self.try_inlining(caller_body, &callsite) { - Err(reason) => { - debug!("not-inlined {} [{}]", callsite.callee, reason); - } - Ok(new_blocks) => { - debug!("inlined {}", callsite.callee); - self.changed = true; - - self.history.push(callsite.callee.def_id()); - self.process_blocks(caller_body, new_blocks); - self.history.pop(); - - inlined_count += 1; - if inlined_count == inline_limit { - debug!("inline count reached"); - return; - } - } - } - } +impl<'tcx> Inliner<'tcx> for ForceInliner<'tcx> { + fn new(tcx: TyCtxt<'tcx>, def_id: LocalDefId, body: &Body<'tcx>) -> Self { + Self { tcx, typing_env: body.typing_env(tcx), def_id, history: Vec::new(), changed: false } } - /// Attempts to inline a callsite into the caller body. When successful returns basic blocks - /// containing the inlined body. Otherwise returns an error describing why inlining didn't take - /// place. - fn try_inlining( - &self, - caller_body: &mut Body<'tcx>, - callsite: &CallSite<'tcx>, - ) -> Result<std::ops::Range<BasicBlock>, &'static str> { - self.check_mir_is_available(caller_body, callsite.callee)?; - - let callee_attrs = self.tcx.codegen_fn_attrs(callsite.callee.def_id()); - let cross_crate_inlinable = self.tcx.cross_crate_inlinable(callsite.callee.def_id()); - self.check_codegen_attributes(callsite, callee_attrs, cross_crate_inlinable)?; - - // Intrinsic fallback bodies are automatically made cross-crate inlineable, - // but at this stage we don't know whether codegen knows the intrinsic, - // so just conservatively don't inline it. This also ensures that we do not - // accidentally inline the body of an intrinsic that *must* be overridden. - if self.tcx.has_attr(callsite.callee.def_id(), sym::rustc_intrinsic) { - return Err("Callee is an intrinsic, do not inline fallback bodies"); - } - - let terminator = caller_body[callsite.block].terminator.as_ref().unwrap(); - let TerminatorKind::Call { args, destination, .. } = &terminator.kind else { bug!() }; - let destination_ty = destination.ty(&caller_body.local_decls, self.tcx).ty; - for arg in args { - if !arg.node.ty(&caller_body.local_decls, self.tcx).is_sized(self.tcx, self.typing_env) - { - // We do not allow inlining functions with unsized params. Inlining these functions - // could create unsized locals, which are unsound and being phased out. - return Err("Call has unsized argument"); - } - } - - let callee_body = try_instance_mir(self.tcx, callsite.callee.def)?; - self.check_mir_body(callsite, callee_body, callee_attrs, cross_crate_inlinable)?; - - let Ok(callee_body) = callsite.callee.try_instantiate_mir_and_normalize_erasing_regions( - self.tcx, - self.typing_env, - ty::EarlyBinder::bind(callee_body.clone()), - ) else { - return Err("failed to normalize callee body"); - }; - - // Normally, this shouldn't be required, but trait normalization failure can create a - // validation ICE. - if !validate_types(self.tcx, self.typing_env, &callee_body, &caller_body).is_empty() { - return Err("failed to validate callee body"); - } - - // Check call signature compatibility. - // Normally, this shouldn't be required, but trait normalization failure can create a - // validation ICE. - let output_type = callee_body.return_ty(); - if !util::sub_types(self.tcx, self.typing_env, output_type, destination_ty) { - trace!(?output_type, ?destination_ty); - return Err("failed to normalize return type"); - } - if callsite.fn_sig.abi() == ExternAbi::RustCall { - // FIXME: Don't inline user-written `extern "rust-call"` functions, - // since this is generally perf-negative on rustc, and we hope that - // LLVM will inline these functions instead. - if callee_body.spread_arg.is_some() { - return Err("do not inline user-written rust-call functions"); - } + fn tcx(&self) -> TyCtxt<'tcx> { + self.tcx + } - let (self_arg, arg_tuple) = match &args[..] { - [arg_tuple] => (None, arg_tuple), - [self_arg, arg_tuple] => (Some(self_arg), arg_tuple), - _ => bug!("Expected `rust-call` to have 1 or 2 args"), - }; + fn typing_env(&self) -> ty::TypingEnv<'tcx> { + self.typing_env + } - let self_arg_ty = - self_arg.map(|self_arg| self_arg.node.ty(&caller_body.local_decls, self.tcx)); + fn history(&self) -> &[DefId] { + &self.history + } - let arg_tuple_ty = arg_tuple.node.ty(&caller_body.local_decls, self.tcx); - let ty::Tuple(arg_tuple_tys) = *arg_tuple_ty.kind() else { - bug!("Closure arguments are not passed as a tuple"); - }; + fn caller_def_id(&self) -> LocalDefId { + self.def_id + } - for (arg_ty, input) in - self_arg_ty.into_iter().chain(arg_tuple_tys).zip(callee_body.args_iter()) - { - let input_type = callee_body.local_decls[input].ty; - if !util::sub_types(self.tcx, self.typing_env, input_type, arg_ty) { - trace!(?arg_ty, ?input_type); - return Err("failed to normalize tuple argument type"); - } - } - } else { - for (arg, input) in args.iter().zip(callee_body.args_iter()) { - let input_type = callee_body.local_decls[input].ty; - let arg_ty = arg.node.ty(&caller_body.local_decls, self.tcx); - if !util::sub_types(self.tcx, self.typing_env, input_type, arg_ty) { - trace!(?arg_ty, ?input_type); - return Err("failed to normalize argument type"); - } - } - } + fn changed(self) -> bool { + self.changed + } - let old_blocks = caller_body.basic_blocks.next_index(); - self.inline_call(caller_body, callsite, callee_body); - let new_blocks = old_blocks..caller_body.basic_blocks.next_index(); + fn should_inline_for_callee(&self, def_id: DefId) -> bool { + ForceInline::should_run_pass_for_callee(self.tcx(), def_id) + } - Ok(new_blocks) + fn check_caller_mir_body(&self, _: &Body<'tcx>) -> bool { + true } - fn check_mir_is_available( + #[instrument(level = "debug", skip(self, callee_body))] + fn check_callee_mir_body( &self, - caller_body: &Body<'tcx>, - callee: Instance<'tcx>, + _: &CallSite<'tcx>, + callee_body: &Body<'tcx>, + _: &CodegenFnAttrs, + _: bool, ) -> Result<(), &'static str> { - let caller_def_id = caller_body.source.def_id(); - let callee_def_id = callee.def_id(); - if callee_def_id == caller_def_id { - return Err("self-recursion"); - } - - match callee.def { - InstanceKind::Item(_) => { - // If there is no MIR available (either because it was not in metadata or - // because it has no MIR because it's an extern function), then the inliner - // won't cause cycles on this. - if !self.tcx.is_mir_available(callee_def_id) { - return Err("item MIR unavailable"); - } - } - // These have no own callable MIR. - InstanceKind::Intrinsic(_) | InstanceKind::Virtual(..) => { - return Err("instance without MIR (intrinsic / virtual)"); - } - - // FIXME(#127030): `ConstParamHasTy` has bad interactions with - // the drop shim builder, which does not evaluate predicates in - // the correct param-env for types being dropped. Stall resolving - // the MIR for this instance until all of its const params are - // substituted. - InstanceKind::DropGlue(_, Some(ty)) if ty.has_type_flags(TypeFlags::HAS_CT_PARAM) => { - return Err("still needs substitution"); - } - - // This cannot result in an immediate cycle since the callee MIR is a shim, which does - // not get any optimizations run on it. Any subsequent inlining may cause cycles, but we - // do not need to catch this here, we can wait until the inliner decides to continue - // inlining a second time. - InstanceKind::VTableShim(_) - | InstanceKind::ReifyShim(..) - | InstanceKind::FnPtrShim(..) - | InstanceKind::ClosureOnceShim { .. } - | InstanceKind::ConstructCoroutineInClosureShim { .. } - | InstanceKind::DropGlue(..) - | InstanceKind::CloneShim(..) - | InstanceKind::ThreadLocalShim(..) - | InstanceKind::FnPtrAddrShim(..) - | InstanceKind::AsyncDropGlueCtorShim(..) => return Ok(()), - } + if let Some(_) = callee_body.tainted_by_errors { Err("body has errors") } else { Ok(()) } + } - if self.tcx.is_constructor(callee_def_id) { - trace!("constructors always have MIR"); - // Constructor functions cannot cause a query cycle. - return Ok(()); - } + fn inline_limit_for_block(&self) -> Option<usize> { + Some(usize::MAX) + } - if callee_def_id.is_local() { - // If we know for sure that the function we're calling will itself try to - // call us, then we avoid inlining that function. - if self.tcx.mir_callgraph_reachable((callee, caller_def_id.expect_local())) { - return Err("caller might be reachable from callee (query cycle avoidance)"); - } + fn on_inline_success( + &mut self, + callsite: &CallSite<'tcx>, + caller_body: &mut Body<'tcx>, + new_blocks: std::ops::Range<BasicBlock>, + ) { + self.changed = true; - Ok(()) - } else { - // This cannot result in an immediate cycle since the callee MIR is from another crate - // and is already optimized. Any subsequent inlining may cause cycles, but we do - // not need to catch this here, we can wait until the inliner decides to continue - // inlining a second time. - trace!("functions from other crates always have MIR"); - Ok(()) - } + self.history.push(callsite.callee.def_id()); + process_blocks(self, caller_body, new_blocks); + self.history.pop(); } - fn resolve_callsite( - &self, - caller_body: &Body<'tcx>, - bb: BasicBlock, - bb_data: &BasicBlockData<'tcx>, - ) -> Option<CallSite<'tcx>> { - // Only consider direct calls to functions - let terminator = bb_data.terminator(); - - // FIXME(explicit_tail_calls): figure out if we can inline tail calls - if let TerminatorKind::Call { ref func, fn_span, .. } = terminator.kind { - let func_ty = func.ty(caller_body, self.tcx); - if let ty::FnDef(def_id, args) = *func_ty.kind() { - // To resolve an instance its args have to be fully normalized. - let args = self.tcx.try_normalize_erasing_regions(self.typing_env, args).ok()?; - let callee = Instance::try_resolve(self.tcx, self.typing_env, def_id, args) - .ok() - .flatten()?; - - if let InstanceKind::Virtual(..) | InstanceKind::Intrinsic(_) = callee.def { - return None; - } - - if self.history.contains(&callee.def_id()) { - return None; - } + fn on_inline_failure(&self, callsite: &CallSite<'tcx>, reason: &'static str) { + let tcx = self.tcx(); + let InlineAttr::Force { attr_span, reason: justification } = + tcx.codegen_fn_attrs(callsite.callee.def_id()).inline + else { + bug!("called on item without required inlining"); + }; - let fn_sig = self.tcx.fn_sig(def_id).instantiate(self.tcx, args); + let call_span = callsite.source_info.span; + tcx.dcx().emit_err(crate::errors::ForceInlineFailure { + call_span, + attr_span, + caller_span: tcx.def_span(self.def_id), + caller: tcx.def_path_str(self.def_id), + callee_span: tcx.def_span(callsite.callee.def_id()), + callee: tcx.def_path_str(callsite.callee.def_id()), + reason, + justification: justification.map(|sym| crate::errors::ForceInlineJustification { sym }), + }); + } - // Additionally, check that the body that we're inlining actually agrees - // with the ABI of the trait that the item comes from. - if let InstanceKind::Item(instance_def_id) = callee.def - && self.tcx.def_kind(instance_def_id) == DefKind::AssocFn - && let instance_fn_sig = self.tcx.fn_sig(instance_def_id).skip_binder() - && instance_fn_sig.abi() != fn_sig.abi() - { - return None; - } + fn on_inline_limit_reached(&self) -> bool { + false + } +} - let source_info = SourceInfo { span: fn_span, ..terminator.source_info }; +struct NormalInliner<'tcx> { + tcx: TyCtxt<'tcx>, + typing_env: ty::TypingEnv<'tcx>, + /// `DefId` of caller. + def_id: LocalDefId, + /// Stack of inlined instances. + /// We only check the `DefId` and not the args because we want to + /// avoid inlining cases of polymorphic recursion. + /// The number of `DefId`s is finite, so checking history is enough + /// to ensure that we do not loop endlessly while inlining. + history: Vec<DefId>, + /// Indicates that the caller body has been modified. + changed: bool, + /// Indicates that the caller is #[inline] and just calls another function, + /// and thus we can inline less into it as it'll be inlined itself. + caller_is_inline_forwarder: bool, +} - return Some(CallSite { callee, fn_sig, block: bb, source_info }); - } +impl<'tcx> Inliner<'tcx> for NormalInliner<'tcx> { + fn new(tcx: TyCtxt<'tcx>, def_id: LocalDefId, body: &Body<'tcx>) -> Self { + let typing_env = body.typing_env(tcx); + let codegen_fn_attrs = tcx.codegen_fn_attrs(def_id); + + Self { + tcx, + typing_env, + def_id, + history: Vec::new(), + changed: false, + caller_is_inline_forwarder: matches!( + codegen_fn_attrs.inline, + InlineAttr::Hint | InlineAttr::Always | InlineAttr::Force { .. } + ) && body_is_forwarder(body), } - - None } - /// Returns an error if inlining is not possible based on codegen attributes alone. A success - /// indicates that inlining decision should be based on other criteria. - fn check_codegen_attributes( - &self, - callsite: &CallSite<'tcx>, - callee_attrs: &CodegenFnAttrs, - cross_crate_inlinable: bool, - ) -> Result<(), &'static str> { - if self.tcx.has_attr(callsite.callee.def_id(), sym::rustc_no_mir_inline) { - return Err("#[rustc_no_mir_inline]"); - } + fn tcx(&self) -> TyCtxt<'tcx> { + self.tcx + } - if let InlineAttr::Never = callee_attrs.inline { - return Err("never inline hint"); - } + fn caller_def_id(&self) -> LocalDefId { + self.def_id + } - // Reachability pass defines which functions are eligible for inlining. Generally inlining - // other functions is incorrect because they could reference symbols that aren't exported. - let is_generic = callsite.callee.args.non_erasable_generics().next().is_some(); - if !is_generic && !cross_crate_inlinable { - return Err("not exported"); - } + fn typing_env(&self) -> ty::TypingEnv<'tcx> { + self.typing_env + } - if callsite.fn_sig.c_variadic() { - return Err("C variadic"); - } + fn history(&self) -> &[DefId] { + &self.history + } - if callee_attrs.flags.contains(CodegenFnAttrFlags::COLD) { - return Err("cold"); - } + fn changed(self) -> bool { + self.changed + } - if callee_attrs.no_sanitize != self.codegen_fn_attrs.no_sanitize { - return Err("incompatible sanitizer set"); - } + fn should_inline_for_callee(&self, _: DefId) -> bool { + true + } - // Two functions are compatible if the callee has no attribute (meaning - // that it's codegen agnostic), or sets an attribute that is identical - // to this function's attribute. - if callee_attrs.instruction_set.is_some() - && callee_attrs.instruction_set != self.codegen_fn_attrs.instruction_set - { - return Err("incompatible instruction set"); + fn check_caller_mir_body(&self, body: &Body<'tcx>) -> bool { + if body.source.promoted.is_some() { + return false; } - let callee_feature_names = callee_attrs.target_features.iter().map(|f| f.name); - let this_feature_names = self.codegen_fn_attrs.target_features.iter().map(|f| f.name); - if callee_feature_names.ne(this_feature_names) { - // In general it is not correct to inline a callee with target features that are a - // subset of the caller. This is because the callee might contain calls, and the ABI of - // those calls depends on the target features of the surrounding function. By moving a - // `Call` terminator from one MIR body to another with more target features, we might - // change the ABI of that call! - return Err("incompatible target features"); + // Avoid inlining into coroutines, since their `optimized_mir` is used for layout computation, + // which can create a cycle, even when no attempt is made to inline the function in the other + // direction. + if body.coroutine.is_some() { + return false; } - Ok(()) + true } - /// Returns inlining decision that is based on the examination of callee MIR body. - /// Assumes that codegen attributes have been checked for compatibility already. #[instrument(level = "debug", skip(self, callee_body))] - fn check_mir_body( + fn check_callee_mir_body( &self, callsite: &CallSite<'tcx>, callee_body: &Body<'tcx>, callee_attrs: &CodegenFnAttrs, cross_crate_inlinable: bool, ) -> Result<(), &'static str> { - let tcx = self.tcx; + let tcx = self.tcx(); if let Some(_) = callee_body.tainted_by_errors { - return Err("Body is tainted"); + return Err("body has errors"); } let mut threshold = if self.caller_is_inline_forwarder { - self.tcx.sess.opts.unstable_opts.inline_mir_forwarder_threshold.unwrap_or(30) + tcx.sess.opts.unstable_opts.inline_mir_forwarder_threshold.unwrap_or(30) } else if cross_crate_inlinable { - self.tcx.sess.opts.unstable_opts.inline_mir_hint_threshold.unwrap_or(100) + tcx.sess.opts.unstable_opts.inline_mir_hint_threshold.unwrap_or(100) } else { - self.tcx.sess.opts.unstable_opts.inline_mir_threshold.unwrap_or(50) + tcx.sess.opts.unstable_opts.inline_mir_threshold.unwrap_or(50) }; // Give a bonus functions with a small number of blocks, @@ -497,7 +352,7 @@ impl<'tcx> Inliner<'tcx> { // FIXME: Give a bonus to functions with only a single caller let mut checker = - CostChecker::new(self.tcx, self.typing_env, Some(callsite.callee), callee_body); + CostChecker::new(tcx, self.typing_env(), Some(callsite.callee), callee_body); checker.add_function_level_costs(); @@ -513,20 +368,20 @@ impl<'tcx> Inliner<'tcx> { checker.visit_basic_block_data(bb, blk); let term = blk.terminator(); + let caller_attrs = tcx.codegen_fn_attrs(self.caller_def_id()); if let TerminatorKind::Drop { ref place, target, unwind, replace: _ } = term.kind { work_list.push(target); // If the place doesn't actually need dropping, treat it like a regular goto. - let ty = callsite.callee.instantiate_mir( - self.tcx, - ty::EarlyBinder::bind(&place.ty(callee_body, tcx).ty), - ); - if ty.needs_drop(tcx, self.typing_env) + let ty = callsite + .callee + .instantiate_mir(tcx, ty::EarlyBinder::bind(&place.ty(callee_body, tcx).ty)); + if ty.needs_drop(tcx, self.typing_env()) && let UnwindAction::Cleanup(unwind) = unwind { work_list.push(unwind); } - } else if callee_attrs.instruction_set != self.codegen_fn_attrs.instruction_set + } else if callee_attrs.instruction_set != caller_attrs.instruction_set && matches!(term.kind, TerminatorKind::InlineAsm { .. }) { // During the attribute checking stage we allow a callee with no @@ -534,7 +389,7 @@ impl<'tcx> Inliner<'tcx> { // assign one. However, during this stage we require an exact match when any // inline-asm is detected. LLVM will still possibly do an inline later on // if the no-attribute function ends up with the same instruction set anyway. - return Err("Cannot move inline-asm across instruction sets"); + return Err("cannot move inline-asm across instruction sets"); } else if let TerminatorKind::TailCall { .. } = term.kind { // FIXME(explicit_tail_calls): figure out how exactly functions containing tail // calls can be inlined (and if they even should) @@ -558,321 +413,710 @@ impl<'tcx> Inliner<'tcx> { } } - fn inline_call( - &self, - caller_body: &mut Body<'tcx>, + fn inline_limit_for_block(&self) -> Option<usize> { + match self.history.len() { + 0 => Some(usize::MAX), + 1..=TOP_DOWN_DEPTH_LIMIT => Some(1), + _ => None, + } + } + + fn on_inline_success( + &mut self, callsite: &CallSite<'tcx>, - mut callee_body: Body<'tcx>, + caller_body: &mut Body<'tcx>, + new_blocks: std::ops::Range<BasicBlock>, ) { - let terminator = caller_body[callsite.block].terminator.take().unwrap(); - let TerminatorKind::Call { func, args, destination, unwind, target, .. } = terminator.kind - else { - bug!("unexpected terminator kind {:?}", terminator.kind); - }; + self.changed = true; - let return_block = if let Some(block) = target { - // Prepare a new block for code that should execute when call returns. We don't use - // target block directly since it might have other predecessors. - let data = BasicBlockData::new( - Some(Terminator { - source_info: terminator.source_info, - kind: TerminatorKind::Goto { target: block }, - }), - caller_body[block].is_cleanup, - ); - Some(caller_body.basic_blocks_mut().push(data)) - } else { - None + self.history.push(callsite.callee.def_id()); + process_blocks(self, caller_body, new_blocks); + self.history.pop(); + } + + fn on_inline_limit_reached(&self) -> bool { + true + } + + fn on_inline_failure(&self, _: &CallSite<'tcx>, _: &'static str) {} +} + +fn inline<'tcx, T: Inliner<'tcx>>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) -> bool { + let def_id = body.source.def_id().expect_local(); + + // Only do inlining into fn bodies. + if !tcx.hir().body_owner_kind(def_id).is_fn_or_closure() { + return false; + } + + let mut inliner = T::new(tcx, def_id, body); + if !inliner.check_caller_mir_body(body) { + return false; + } + + let blocks = START_BLOCK..body.basic_blocks.next_index(); + process_blocks(&mut inliner, body, blocks); + inliner.changed() +} + +fn process_blocks<'tcx, I: Inliner<'tcx>>( + inliner: &mut I, + caller_body: &mut Body<'tcx>, + blocks: Range<BasicBlock>, +) { + let Some(inline_limit) = inliner.inline_limit_for_block() else { return }; + let mut inlined_count = 0; + for bb in blocks { + let bb_data = &caller_body[bb]; + if bb_data.is_cleanup { + continue; + } + + let Some(callsite) = resolve_callsite(inliner, caller_body, bb, bb_data) else { + continue; }; - // If the call is something like `a[*i] = f(i)`, where - // `i : &mut usize`, then just duplicating the `a[*i]` - // Place could result in two different locations if `f` - // writes to `i`. To prevent this we need to create a temporary - // borrow of the place and pass the destination as `*temp` instead. - fn dest_needs_borrow(place: Place<'_>) -> bool { - for elem in place.projection.iter() { - match elem { - ProjectionElem::Deref | ProjectionElem::Index(_) => return true, - _ => {} + let span = trace_span!("process_blocks", %callsite.callee, ?bb); + let _guard = span.enter(); + + if !inliner.should_inline_for_callee(callsite.callee.def_id()) { + debug!("not enabled"); + continue; + } + + match try_inlining(inliner, caller_body, &callsite) { + Err(reason) => { + debug!("not-inlined {} [{}]", callsite.callee, reason); + inliner.on_inline_failure(&callsite, reason); + } + Ok(new_blocks) => { + debug!("inlined {}", callsite.callee); + inliner.on_inline_success(&callsite, caller_body, new_blocks); + + inlined_count += 1; + if inlined_count == inline_limit { + if inliner.on_inline_limit_reached() { + return; + } } } + } + } +} + +fn resolve_callsite<'tcx, I: Inliner<'tcx>>( + inliner: &I, + caller_body: &Body<'tcx>, + bb: BasicBlock, + bb_data: &BasicBlockData<'tcx>, +) -> Option<CallSite<'tcx>> { + let tcx = inliner.tcx(); + // Only consider direct calls to functions + let terminator = bb_data.terminator(); + + // FIXME(explicit_tail_calls): figure out if we can inline tail calls + if let TerminatorKind::Call { ref func, fn_span, .. } = terminator.kind { + let func_ty = func.ty(caller_body, tcx); + if let ty::FnDef(def_id, args) = *func_ty.kind() { + // To resolve an instance its args have to be fully normalized. + let args = tcx.try_normalize_erasing_regions(inliner.typing_env(), args).ok()?; + let callee = + Instance::try_resolve(tcx, inliner.typing_env(), def_id, args).ok().flatten()?; + + if let InstanceKind::Virtual(..) | InstanceKind::Intrinsic(_) = callee.def { + return None; + } - false + if inliner.history().contains(&callee.def_id()) { + return None; + } + + let fn_sig = tcx.fn_sig(def_id).instantiate(tcx, args); + + // Additionally, check that the body that we're inlining actually agrees + // with the ABI of the trait that the item comes from. + if let InstanceKind::Item(instance_def_id) = callee.def + && tcx.def_kind(instance_def_id) == DefKind::AssocFn + && let instance_fn_sig = tcx.fn_sig(instance_def_id).skip_binder() + && instance_fn_sig.abi() != fn_sig.abi() + { + return None; + } + + let source_info = SourceInfo { span: fn_span, ..terminator.source_info }; + + return Some(CallSite { callee, fn_sig, block: bb, source_info }); } + } - let dest = if dest_needs_borrow(destination) { - trace!("creating temp for return destination"); - let dest = Rvalue::Ref( - self.tcx.lifetimes.re_erased, - BorrowKind::Mut { kind: MutBorrowKind::Default }, - destination, - ); - let dest_ty = dest.ty(caller_body, self.tcx); - let temp = - Place::from(self.new_call_temp(caller_body, callsite, dest_ty, return_block)); - caller_body[callsite.block].statements.push(Statement { - source_info: callsite.source_info, - kind: StatementKind::Assign(Box::new((temp, dest))), - }); - self.tcx.mk_place_deref(temp) - } else { - destination - }; + None +} - // Always create a local to hold the destination, as `RETURN_PLACE` may appear - // where a full `Place` is not allowed. - let (remap_destination, destination_local) = if let Some(d) = dest.as_local() { - (false, d) - } else { - ( - true, - self.new_call_temp( - caller_body, - callsite, - destination.ty(caller_body, self.tcx).ty, - return_block, - ), - ) - }; +/// Attempts to inline a callsite into the caller body. When successful returns basic blocks +/// containing the inlined body. Otherwise returns an error describing why inlining didn't take +/// place. +fn try_inlining<'tcx, I: Inliner<'tcx>>( + inliner: &I, + caller_body: &mut Body<'tcx>, + callsite: &CallSite<'tcx>, +) -> Result<std::ops::Range<BasicBlock>, &'static str> { + let tcx = inliner.tcx(); + check_mir_is_available(inliner, caller_body, callsite.callee)?; + + let callee_attrs = tcx.codegen_fn_attrs(callsite.callee.def_id()); + let cross_crate_inlinable = tcx.cross_crate_inlinable(callsite.callee.def_id()); + check_codegen_attributes(inliner, callsite, callee_attrs, cross_crate_inlinable)?; + + // Intrinsic fallback bodies are automatically made cross-crate inlineable, + // but at this stage we don't know whether codegen knows the intrinsic, + // so just conservatively don't inline it. This also ensures that we do not + // accidentally inline the body of an intrinsic that *must* be overridden. + if tcx.has_attr(callsite.callee.def_id(), sym::rustc_intrinsic) { + return Err("callee is an intrinsic"); + } - // Copy the arguments if needed. - let args = self.make_call_args(args, callsite, caller_body, &callee_body, return_block); + let terminator = caller_body[callsite.block].terminator.as_ref().unwrap(); + let TerminatorKind::Call { args, destination, .. } = &terminator.kind else { bug!() }; + let destination_ty = destination.ty(&caller_body.local_decls, tcx).ty; + for arg in args { + if !arg.node.ty(&caller_body.local_decls, tcx).is_sized(tcx, inliner.typing_env()) { + // We do not allow inlining functions with unsized params. Inlining these functions + // could create unsized locals, which are unsound and being phased out. + return Err("call has unsized argument"); + } + } - let mut integrator = Integrator { - args: &args, - new_locals: Local::new(caller_body.local_decls.len()).., - new_scopes: SourceScope::new(caller_body.source_scopes.len()).., - new_blocks: BasicBlock::new(caller_body.basic_blocks.len()).., - destination: destination_local, - callsite_scope: caller_body.source_scopes[callsite.source_info.scope].clone(), - callsite, - cleanup_block: unwind, - in_cleanup_block: false, - return_block, - tcx: self.tcx, - always_live_locals: BitSet::new_filled(callee_body.local_decls.len()), + let callee_body = try_instance_mir(tcx, callsite.callee.def)?; + inliner.check_callee_mir_body(callsite, callee_body, callee_attrs, cross_crate_inlinable)?; + + let Ok(callee_body) = callsite.callee.try_instantiate_mir_and_normalize_erasing_regions( + tcx, + inliner.typing_env(), + ty::EarlyBinder::bind(callee_body.clone()), + ) else { + debug!("failed to normalize callee body"); + return Err("implementation limitation"); + }; + + // Normally, this shouldn't be required, but trait normalization failure can create a + // validation ICE. + if !validate_types(tcx, inliner.typing_env(), &callee_body, &caller_body).is_empty() { + debug!("failed to validate callee body"); + return Err("implementation limitation"); + } + + // Check call signature compatibility. + // Normally, this shouldn't be required, but trait normalization failure can create a + // validation ICE. + let output_type = callee_body.return_ty(); + if !util::sub_types(tcx, inliner.typing_env(), output_type, destination_ty) { + trace!(?output_type, ?destination_ty); + debug!("failed to normalize return type"); + return Err("implementation limitation"); + } + if callsite.fn_sig.abi() == ExternAbi::RustCall { + // FIXME: Don't inline user-written `extern "rust-call"` functions, + // since this is generally perf-negative on rustc, and we hope that + // LLVM will inline these functions instead. + if callee_body.spread_arg.is_some() { + return Err("user-written rust-call functions"); + } + + let (self_arg, arg_tuple) = match &args[..] { + [arg_tuple] => (None, arg_tuple), + [self_arg, arg_tuple] => (Some(self_arg), arg_tuple), + _ => bug!("Expected `rust-call` to have 1 or 2 args"), }; - // Map all `Local`s, `SourceScope`s and `BasicBlock`s to new ones - // (or existing ones, in a few special cases) in the caller. - integrator.visit_body(&mut callee_body); + let self_arg_ty = self_arg.map(|self_arg| self_arg.node.ty(&caller_body.local_decls, tcx)); - // If there are any locals without storage markers, give them storage only for the - // duration of the call. - for local in callee_body.vars_and_temps_iter() { - if integrator.always_live_locals.contains(local) { - let new_local = integrator.map_local(local); - caller_body[callsite.block].statements.push(Statement { - source_info: callsite.source_info, - kind: StatementKind::StorageLive(new_local), - }); + let arg_tuple_ty = arg_tuple.node.ty(&caller_body.local_decls, tcx); + let ty::Tuple(arg_tuple_tys) = *arg_tuple_ty.kind() else { + bug!("Closure arguments are not passed as a tuple"); + }; + + for (arg_ty, input) in + self_arg_ty.into_iter().chain(arg_tuple_tys).zip(callee_body.args_iter()) + { + let input_type = callee_body.local_decls[input].ty; + if !util::sub_types(tcx, inliner.typing_env(), input_type, arg_ty) { + trace!(?arg_ty, ?input_type); + debug!("failed to normalize tuple argument type"); + return Err("implementation limitation"); } } - if let Some(block) = return_block { - // To avoid repeated O(n) insert, push any new statements to the end and rotate - // the slice once. - let mut n = 0; - if remap_destination { - caller_body[block].statements.push(Statement { - source_info: callsite.source_info, - kind: StatementKind::Assign(Box::new(( - dest, - Rvalue::Use(Operand::Move(destination_local.into())), - ))), - }); - n += 1; + } else { + for (arg, input) in args.iter().zip(callee_body.args_iter()) { + let input_type = callee_body.local_decls[input].ty; + let arg_ty = arg.node.ty(&caller_body.local_decls, tcx); + if !util::sub_types(tcx, inliner.typing_env(), input_type, arg_ty) { + trace!(?arg_ty, ?input_type); + debug!("failed to normalize argument type"); + return Err("implementation limitation"); } - for local in callee_body.vars_and_temps_iter().rev() { - if integrator.always_live_locals.contains(local) { - let new_local = integrator.map_local(local); - caller_body[block].statements.push(Statement { - source_info: callsite.source_info, - kind: StatementKind::StorageDead(new_local), - }); - n += 1; - } + } + } + + let old_blocks = caller_body.basic_blocks.next_index(); + inline_call(inliner, caller_body, callsite, callee_body); + let new_blocks = old_blocks..caller_body.basic_blocks.next_index(); + + Ok(new_blocks) +} + +fn check_mir_is_available<'tcx, I: Inliner<'tcx>>( + inliner: &I, + caller_body: &Body<'tcx>, + callee: Instance<'tcx>, +) -> Result<(), &'static str> { + let caller_def_id = caller_body.source.def_id(); + let callee_def_id = callee.def_id(); + if callee_def_id == caller_def_id { + return Err("self-recursion"); + } + + match callee.def { + InstanceKind::Item(_) => { + // If there is no MIR available (either because it was not in metadata or + // because it has no MIR because it's an extern function), then the inliner + // won't cause cycles on this. + if !inliner.tcx().is_mir_available(callee_def_id) { + debug!("item MIR unavailable"); + return Err("implementation limitation"); } - caller_body[block].statements.rotate_right(n); + } + // These have no own callable MIR. + InstanceKind::Intrinsic(_) | InstanceKind::Virtual(..) => { + debug!("instance without MIR (intrinsic / virtual)"); + return Err("implementation limitation"); } - // Insert all of the (mapped) parts of the callee body into the caller. - caller_body.local_decls.extend(callee_body.drain_vars_and_temps()); - caller_body.source_scopes.append(&mut callee_body.source_scopes); - if self - .tcx - .sess - .opts - .unstable_opts - .inline_mir_preserve_debug - .unwrap_or(self.tcx.sess.opts.debuginfo != DebugInfo::None) - { - // Note that we need to preserve these in the standard library so that - // people working on rust can build with or without debuginfo while - // still getting consistent results from the mir-opt tests. - caller_body.var_debug_info.append(&mut callee_body.var_debug_info); + // FIXME(#127030): `ConstParamHasTy` has bad interactions with + // the drop shim builder, which does not evaluate predicates in + // the correct param-env for types being dropped. Stall resolving + // the MIR for this instance until all of its const params are + // substituted. + InstanceKind::DropGlue(_, Some(ty)) if ty.has_type_flags(TypeFlags::HAS_CT_PARAM) => { + debug!("still needs substitution"); + return Err("implementation limitation"); } - caller_body.basic_blocks_mut().append(callee_body.basic_blocks_mut()); - caller_body[callsite.block].terminator = Some(Terminator { - source_info: callsite.source_info, - kind: TerminatorKind::Goto { target: integrator.map_block(START_BLOCK) }, - }); + // This cannot result in an immediate cycle since the callee MIR is a shim, which does + // not get any optimizations run on it. Any subsequent inlining may cause cycles, but we + // do not need to catch this here, we can wait until the inliner decides to continue + // inlining a second time. + InstanceKind::VTableShim(_) + | InstanceKind::ReifyShim(..) + | InstanceKind::FnPtrShim(..) + | InstanceKind::ClosureOnceShim { .. } + | InstanceKind::ConstructCoroutineInClosureShim { .. } + | InstanceKind::DropGlue(..) + | InstanceKind::CloneShim(..) + | InstanceKind::ThreadLocalShim(..) + | InstanceKind::FnPtrAddrShim(..) + | InstanceKind::AsyncDropGlueCtorShim(..) => return Ok(()), + } - // Copy required constants from the callee_body into the caller_body. Although we are only - // pushing unevaluated consts to `required_consts`, here they may have been evaluated - // because we are calling `instantiate_and_normalize_erasing_regions` -- so we filter again. - caller_body.required_consts.as_mut().unwrap().extend( - callee_body.required_consts().into_iter().filter(|ct| ct.const_.is_required_const()), - ); - // Now that we incorporated the callee's `required_consts`, we can remove the callee from - // `mentioned_items` -- but we have to take their `mentioned_items` in return. This does - // some extra work here to save the monomorphization collector work later. It helps a lot, - // since monomorphization can avoid a lot of work when the "mentioned items" are similar to - // the actually used items. By doing this we can entirely avoid visiting the callee! - // We need to reconstruct the `required_item` for the callee so that we can find and - // remove it. - let callee_item = MentionedItem::Fn(func.ty(caller_body, self.tcx)); - let caller_mentioned_items = caller_body.mentioned_items.as_mut().unwrap(); - if let Some(idx) = caller_mentioned_items.iter().position(|item| item.node == callee_item) { - // We found the callee, so remove it and add its items instead. - caller_mentioned_items.remove(idx); - caller_mentioned_items.extend(callee_body.mentioned_items()); - } else { - // If we can't find the callee, there's no point in adding its items. Probably it - // already got removed by being inlined elsewhere in the same function, so we already - // took its items. + if inliner.tcx().is_constructor(callee_def_id) { + trace!("constructors always have MIR"); + // Constructor functions cannot cause a query cycle. + return Ok(()); + } + + if callee_def_id.is_local() { + // If we know for sure that the function we're calling will itself try to + // call us, then we avoid inlining that function. + if inliner.tcx().mir_callgraph_reachable((callee, caller_def_id.expect_local())) { + debug!("query cycle avoidance"); + return Err("caller might be reachable from callee"); } + + Ok(()) + } else { + // This cannot result in an immediate cycle since the callee MIR is from another crate + // and is already optimized. Any subsequent inlining may cause cycles, but we do + // not need to catch this here, we can wait until the inliner decides to continue + // inlining a second time. + trace!("functions from other crates always have MIR"); + Ok(()) } +} - fn make_call_args( - &self, - args: Box<[Spanned<Operand<'tcx>>]>, - callsite: &CallSite<'tcx>, - caller_body: &mut Body<'tcx>, - callee_body: &Body<'tcx>, - return_block: Option<BasicBlock>, - ) -> Box<[Local]> { - let tcx = self.tcx; - - // There is a bit of a mismatch between the *caller* of a closure and the *callee*. - // The caller provides the arguments wrapped up in a tuple: - // - // tuple_tmp = (a, b, c) - // Fn::call(closure_ref, tuple_tmp) - // - // meanwhile the closure body expects the arguments (here, `a`, `b`, and `c`) - // as distinct arguments. (This is the "rust-call" ABI hack.) Normally, codegen has - // the job of unpacking this tuple. But here, we are codegen. =) So we want to create - // a vector like - // - // [closure_ref, tuple_tmp.0, tuple_tmp.1, tuple_tmp.2] - // - // Except for one tiny wrinkle: we don't actually want `tuple_tmp.0`. It's more convenient - // if we "spill" that into *another* temporary, so that we can map the argument - // variable in the callee MIR directly to an argument variable on our side. - // So we introduce temporaries like: - // - // tmp0 = tuple_tmp.0 - // tmp1 = tuple_tmp.1 - // tmp2 = tuple_tmp.2 - // - // and the vector is `[closure_ref, tmp0, tmp1, tmp2]`. - if callsite.fn_sig.abi() == ExternAbi::RustCall && callee_body.spread_arg.is_none() { - // FIXME(edition_2024): switch back to a normal method call. - let mut args = <_>::into_iter(args); - let self_ = self.create_temp_if_necessary( - args.next().unwrap().node, - callsite, - caller_body, - return_block, - ); - let tuple = self.create_temp_if_necessary( - args.next().unwrap().node, - callsite, - caller_body, - return_block, - ); - assert!(args.next().is_none()); - - let tuple = Place::from(tuple); - let ty::Tuple(tuple_tys) = tuple.ty(caller_body, tcx).ty.kind() else { - bug!("Closure arguments are not passed as a tuple"); - }; +/// Returns an error if inlining is not possible based on codegen attributes alone. A success +/// indicates that inlining decision should be based on other criteria. +fn check_codegen_attributes<'tcx, I: Inliner<'tcx>>( + inliner: &I, + callsite: &CallSite<'tcx>, + callee_attrs: &CodegenFnAttrs, + cross_crate_inlinable: bool, +) -> Result<(), &'static str> { + let tcx = inliner.tcx(); + if tcx.has_attr(callsite.callee.def_id(), sym::rustc_no_mir_inline) { + return Err("#[rustc_no_mir_inline]"); + } - // The `closure_ref` in our example above. - let closure_ref_arg = iter::once(self_); + if let InlineAttr::Never = callee_attrs.inline { + return Err("never inline attribute"); + } - // The `tmp0`, `tmp1`, and `tmp2` in our example above. - let tuple_tmp_args = tuple_tys.iter().enumerate().map(|(i, ty)| { - // This is e.g., `tuple_tmp.0` in our example above. - let tuple_field = Operand::Move(tcx.mk_place_field(tuple, FieldIdx::new(i), ty)); + // FIXME(#127234): Coverage instrumentation currently doesn't handle inlined + // MIR correctly when Modified Condition/Decision Coverage is enabled. + if tcx.sess.instrument_coverage_mcdc() { + return Err("incompatible with MC/DC coverage"); + } - // Spill to a local to make e.g., `tmp0`. - self.create_temp_if_necessary(tuple_field, callsite, caller_body, return_block) - }); + // Reachability pass defines which functions are eligible for inlining. Generally inlining + // other functions is incorrect because they could reference symbols that aren't exported. + let is_generic = callsite.callee.args.non_erasable_generics().next().is_some(); + if !is_generic && !cross_crate_inlinable { + return Err("not exported"); + } - closure_ref_arg.chain(tuple_tmp_args).collect() - } else { - // FIXME(edition_2024): switch back to a normal method call. - <_>::into_iter(args) - .map(|a| self.create_temp_if_necessary(a.node, callsite, caller_body, return_block)) - .collect() - } + if callsite.fn_sig.c_variadic() { + return Err("C variadic"); } - /// If `arg` is already a temporary, returns it. Otherwise, introduces a fresh - /// temporary `T` and an instruction `T = arg`, and returns `T`. - fn create_temp_if_necessary( - &self, - arg: Operand<'tcx>, - callsite: &CallSite<'tcx>, - caller_body: &mut Body<'tcx>, - return_block: Option<BasicBlock>, - ) -> Local { - // Reuse the operand if it is a moved temporary. - if let Operand::Move(place) = &arg - && let Some(local) = place.as_local() - && caller_body.local_kind(local) == LocalKind::Temp - { - return local; - } + if callee_attrs.flags.contains(CodegenFnAttrFlags::COLD) { + return Err("cold"); + } - // Otherwise, create a temporary for the argument. - trace!("creating temp for argument {:?}", arg); - let arg_ty = arg.ty(caller_body, self.tcx); - let local = self.new_call_temp(caller_body, callsite, arg_ty, return_block); - caller_body[callsite.block].statements.push(Statement { - source_info: callsite.source_info, - kind: StatementKind::Assign(Box::new((Place::from(local), Rvalue::Use(arg)))), - }); - local + let codegen_fn_attrs = tcx.codegen_fn_attrs(inliner.caller_def_id()); + if callee_attrs.no_sanitize != codegen_fn_attrs.no_sanitize { + return Err("incompatible sanitizer set"); } - /// Introduces a new temporary into the caller body that is live for the duration of the call. - fn new_call_temp( - &self, - caller_body: &mut Body<'tcx>, - callsite: &CallSite<'tcx>, - ty: Ty<'tcx>, - return_block: Option<BasicBlock>, - ) -> Local { - let local = caller_body.local_decls.push(LocalDecl::new(ty, callsite.source_info.span)); + // Two functions are compatible if the callee has no attribute (meaning + // that it's codegen agnostic), or sets an attribute that is identical + // to this function's attribute. + if callee_attrs.instruction_set.is_some() + && callee_attrs.instruction_set != codegen_fn_attrs.instruction_set + { + return Err("incompatible instruction set"); + } + + let callee_feature_names = callee_attrs.target_features.iter().map(|f| f.name); + let this_feature_names = codegen_fn_attrs.target_features.iter().map(|f| f.name); + if callee_feature_names.ne(this_feature_names) { + // In general it is not correct to inline a callee with target features that are a + // subset of the caller. This is because the callee might contain calls, and the ABI of + // those calls depends on the target features of the surrounding function. By moving a + // `Call` terminator from one MIR body to another with more target features, we might + // change the ABI of that call! + return Err("incompatible target features"); + } + Ok(()) +} + +fn inline_call<'tcx, I: Inliner<'tcx>>( + inliner: &I, + caller_body: &mut Body<'tcx>, + callsite: &CallSite<'tcx>, + mut callee_body: Body<'tcx>, +) { + let tcx = inliner.tcx(); + let terminator = caller_body[callsite.block].terminator.take().unwrap(); + let TerminatorKind::Call { func, args, destination, unwind, target, .. } = terminator.kind + else { + bug!("unexpected terminator kind {:?}", terminator.kind); + }; + + let return_block = if let Some(block) = target { + // Prepare a new block for code that should execute when call returns. We don't use + // target block directly since it might have other predecessors. + let data = BasicBlockData::new( + Some(Terminator { + source_info: terminator.source_info, + kind: TerminatorKind::Goto { target: block }, + }), + caller_body[block].is_cleanup, + ); + Some(caller_body.basic_blocks_mut().push(data)) + } else { + None + }; + + // If the call is something like `a[*i] = f(i)`, where + // `i : &mut usize`, then just duplicating the `a[*i]` + // Place could result in two different locations if `f` + // writes to `i`. To prevent this we need to create a temporary + // borrow of the place and pass the destination as `*temp` instead. + fn dest_needs_borrow(place: Place<'_>) -> bool { + for elem in place.projection.iter() { + match elem { + ProjectionElem::Deref | ProjectionElem::Index(_) => return true, + _ => {} + } + } + + false + } + + let dest = if dest_needs_borrow(destination) { + trace!("creating temp for return destination"); + let dest = Rvalue::Ref( + tcx.lifetimes.re_erased, + BorrowKind::Mut { kind: MutBorrowKind::Default }, + destination, + ); + let dest_ty = dest.ty(caller_body, tcx); + let temp = Place::from(new_call_temp(caller_body, callsite, dest_ty, return_block)); caller_body[callsite.block].statements.push(Statement { source_info: callsite.source_info, - kind: StatementKind::StorageLive(local), + kind: StatementKind::Assign(Box::new((temp, dest))), }); + tcx.mk_place_deref(temp) + } else { + destination + }; + + // Always create a local to hold the destination, as `RETURN_PLACE` may appear + // where a full `Place` is not allowed. + let (remap_destination, destination_local) = if let Some(d) = dest.as_local() { + (false, d) + } else { + ( + true, + new_call_temp(caller_body, callsite, destination.ty(caller_body, tcx).ty, return_block), + ) + }; - if let Some(block) = return_block { - caller_body[block].statements.insert(0, Statement { + // Copy the arguments if needed. + let args = make_call_args(inliner, args, callsite, caller_body, &callee_body, return_block); + + let mut integrator = Integrator { + args: &args, + new_locals: Local::new(caller_body.local_decls.len()).., + new_scopes: SourceScope::new(caller_body.source_scopes.len()).., + new_blocks: BasicBlock::new(caller_body.basic_blocks.len()).., + destination: destination_local, + callsite_scope: caller_body.source_scopes[callsite.source_info.scope].clone(), + callsite, + cleanup_block: unwind, + in_cleanup_block: false, + return_block, + tcx, + always_live_locals: BitSet::new_filled(callee_body.local_decls.len()), + }; + + // Map all `Local`s, `SourceScope`s and `BasicBlock`s to new ones + // (or existing ones, in a few special cases) in the caller. + integrator.visit_body(&mut callee_body); + + // If there are any locals without storage markers, give them storage only for the + // duration of the call. + for local in callee_body.vars_and_temps_iter() { + if integrator.always_live_locals.contains(local) { + let new_local = integrator.map_local(local); + caller_body[callsite.block].statements.push(Statement { source_info: callsite.source_info, - kind: StatementKind::StorageDead(local), + kind: StatementKind::StorageLive(new_local), }); } + } + if let Some(block) = return_block { + // To avoid repeated O(n) insert, push any new statements to the end and rotate + // the slice once. + let mut n = 0; + if remap_destination { + caller_body[block].statements.push(Statement { + source_info: callsite.source_info, + kind: StatementKind::Assign(Box::new(( + dest, + Rvalue::Use(Operand::Move(destination_local.into())), + ))), + }); + n += 1; + } + for local in callee_body.vars_and_temps_iter().rev() { + if integrator.always_live_locals.contains(local) { + let new_local = integrator.map_local(local); + caller_body[block].statements.push(Statement { + source_info: callsite.source_info, + kind: StatementKind::StorageDead(new_local), + }); + n += 1; + } + } + caller_body[block].statements.rotate_right(n); + } + + // Insert all of the (mapped) parts of the callee body into the caller. + caller_body.local_decls.extend(callee_body.drain_vars_and_temps()); + caller_body.source_scopes.append(&mut callee_body.source_scopes); + if tcx + .sess + .opts + .unstable_opts + .inline_mir_preserve_debug + .unwrap_or(tcx.sess.opts.debuginfo != DebugInfo::None) + { + // Note that we need to preserve these in the standard library so that + // people working on rust can build with or without debuginfo while + // still getting consistent results from the mir-opt tests. + caller_body.var_debug_info.append(&mut callee_body.var_debug_info); + } + caller_body.basic_blocks_mut().append(callee_body.basic_blocks_mut()); + + caller_body[callsite.block].terminator = Some(Terminator { + source_info: callsite.source_info, + kind: TerminatorKind::Goto { target: integrator.map_block(START_BLOCK) }, + }); + + // Copy required constants from the callee_body into the caller_body. Although we are only + // pushing unevaluated consts to `required_consts`, here they may have been evaluated + // because we are calling `instantiate_and_normalize_erasing_regions` -- so we filter again. + caller_body.required_consts.as_mut().unwrap().extend( + callee_body.required_consts().into_iter().filter(|ct| ct.const_.is_required_const()), + ); + // Now that we incorporated the callee's `required_consts`, we can remove the callee from + // `mentioned_items` -- but we have to take their `mentioned_items` in return. This does + // some extra work here to save the monomorphization collector work later. It helps a lot, + // since monomorphization can avoid a lot of work when the "mentioned items" are similar to + // the actually used items. By doing this we can entirely avoid visiting the callee! + // We need to reconstruct the `required_item` for the callee so that we can find and + // remove it. + let callee_item = MentionedItem::Fn(func.ty(caller_body, tcx)); + let caller_mentioned_items = caller_body.mentioned_items.as_mut().unwrap(); + if let Some(idx) = caller_mentioned_items.iter().position(|item| item.node == callee_item) { + // We found the callee, so remove it and add its items instead. + caller_mentioned_items.remove(idx); + caller_mentioned_items.extend(callee_body.mentioned_items()); + } else { + // If we can't find the callee, there's no point in adding its items. Probably it + // already got removed by being inlined elsewhere in the same function, so we already + // took its items. + } +} + +fn make_call_args<'tcx, I: Inliner<'tcx>>( + inliner: &I, + args: Box<[Spanned<Operand<'tcx>>]>, + callsite: &CallSite<'tcx>, + caller_body: &mut Body<'tcx>, + callee_body: &Body<'tcx>, + return_block: Option<BasicBlock>, +) -> Box<[Local]> { + let tcx = inliner.tcx(); + + // There is a bit of a mismatch between the *caller* of a closure and the *callee*. + // The caller provides the arguments wrapped up in a tuple: + // + // tuple_tmp = (a, b, c) + // Fn::call(closure_ref, tuple_tmp) + // + // meanwhile the closure body expects the arguments (here, `a`, `b`, and `c`) + // as distinct arguments. (This is the "rust-call" ABI hack.) Normally, codegen has + // the job of unpacking this tuple. But here, we are codegen. =) So we want to create + // a vector like + // + // [closure_ref, tuple_tmp.0, tuple_tmp.1, tuple_tmp.2] + // + // Except for one tiny wrinkle: we don't actually want `tuple_tmp.0`. It's more convenient + // if we "spill" that into *another* temporary, so that we can map the argument + // variable in the callee MIR directly to an argument variable on our side. + // So we introduce temporaries like: + // + // tmp0 = tuple_tmp.0 + // tmp1 = tuple_tmp.1 + // tmp2 = tuple_tmp.2 + // + // and the vector is `[closure_ref, tmp0, tmp1, tmp2]`. + if callsite.fn_sig.abi() == ExternAbi::RustCall && callee_body.spread_arg.is_none() { + // FIXME(edition_2024): switch back to a normal method call. + let mut args = <_>::into_iter(args); + let self_ = create_temp_if_necessary( + inliner, + args.next().unwrap().node, + callsite, + caller_body, + return_block, + ); + let tuple = create_temp_if_necessary( + inliner, + args.next().unwrap().node, + callsite, + caller_body, + return_block, + ); + assert!(args.next().is_none()); + + let tuple = Place::from(tuple); + let ty::Tuple(tuple_tys) = tuple.ty(caller_body, tcx).ty.kind() else { + bug!("Closure arguments are not passed as a tuple"); + }; + + // The `closure_ref` in our example above. + let closure_ref_arg = iter::once(self_); + + // The `tmp0`, `tmp1`, and `tmp2` in our example above. + let tuple_tmp_args = tuple_tys.iter().enumerate().map(|(i, ty)| { + // This is e.g., `tuple_tmp.0` in our example above. + let tuple_field = Operand::Move(tcx.mk_place_field(tuple, FieldIdx::new(i), ty)); + + // Spill to a local to make e.g., `tmp0`. + create_temp_if_necessary(inliner, tuple_field, callsite, caller_body, return_block) + }); + + closure_ref_arg.chain(tuple_tmp_args).collect() + } else { + // FIXME(edition_2024): switch back to a normal method call. + <_>::into_iter(args) + .map(|a| create_temp_if_necessary(inliner, a.node, callsite, caller_body, return_block)) + .collect() + } +} - local +/// If `arg` is already a temporary, returns it. Otherwise, introduces a fresh temporary `T` and an +/// instruction `T = arg`, and returns `T`. +fn create_temp_if_necessary<'tcx, I: Inliner<'tcx>>( + inliner: &I, + arg: Operand<'tcx>, + callsite: &CallSite<'tcx>, + caller_body: &mut Body<'tcx>, + return_block: Option<BasicBlock>, +) -> Local { + // Reuse the operand if it is a moved temporary. + if let Operand::Move(place) = &arg + && let Some(local) = place.as_local() + && caller_body.local_kind(local) == LocalKind::Temp + { + return local; + } + + // Otherwise, create a temporary for the argument. + trace!("creating temp for argument {:?}", arg); + let arg_ty = arg.ty(caller_body, inliner.tcx()); + let local = new_call_temp(caller_body, callsite, arg_ty, return_block); + caller_body[callsite.block].statements.push(Statement { + source_info: callsite.source_info, + kind: StatementKind::Assign(Box::new((Place::from(local), Rvalue::Use(arg)))), + }); + local +} + +/// Introduces a new temporary into the caller body that is live for the duration of the call. +fn new_call_temp<'tcx>( + caller_body: &mut Body<'tcx>, + callsite: &CallSite<'tcx>, + ty: Ty<'tcx>, + return_block: Option<BasicBlock>, +) -> Local { + let local = caller_body.local_decls.push(LocalDecl::new(ty, callsite.source_info.span)); + + caller_body[callsite.block].statements.push(Statement { + source_info: callsite.source_info, + kind: StatementKind::StorageLive(local), + }); + + if let Some(block) = return_block { + caller_body[block].statements.insert(0, Statement { + source_info: callsite.source_info, + kind: StatementKind::StorageDead(local), + }); } + + local } /** diff --git a/compiler/rustc_mir_transform/src/lib.rs b/compiler/rustc_mir_transform/src/lib.rs index e1fba9be5bb..350929ffaa5 100644 --- a/compiler/rustc_mir_transform/src/lib.rs +++ b/compiler/rustc_mir_transform/src/lib.rs @@ -141,7 +141,7 @@ declare_passes! { mod gvn : GVN; // Made public so that `mir_drops_elaborated_and_const_checked` can be overridden // by custom rustc drivers, running all the steps by themselves. See #114628. - pub mod inline : Inline; + pub mod inline : Inline, ForceInline; mod instsimplify : InstSimplify { BeforeInline, AfterSimplifyCfg }; mod jump_threading : JumpThreading; mod known_panics_lint : KnownPanicsLint; @@ -488,7 +488,9 @@ fn mir_drops_elaborated_and_const_checked(tcx: TyCtxt<'_>, def: LocalDefId) -> & let is_fn_like = tcx.def_kind(def).is_fn_like(); if is_fn_like { // Do not compute the mir call graph without said call graph actually being used. - if pm::should_run_pass(tcx, &inline::Inline) { + if pm::should_run_pass(tcx, &inline::Inline) + || inline::ForceInline::should_run_pass_for_callee(tcx, def.to_def_id()) + { tcx.ensure_with_value().mir_inliner_callees(ty::InstanceKind::Item(def.to_def_id())); } } @@ -664,6 +666,8 @@ fn run_optimization_passes<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) { // Perform instsimplify before inline to eliminate some trivial calls (like clone // shims). &instsimplify::InstSimplify::BeforeInline, + // Perform inlining of `#[rustc_force_inline]`-annotated callees. + &inline::ForceInline, // Perform inlining, which may add a lot of code. &inline::Inline, // Code from other crates may have storage markers, so this needs to happen after diff --git a/compiler/rustc_mir_transform/src/pass_manager.rs b/compiler/rustc_mir_transform/src/pass_manager.rs index 8a45ce0762d..c3f0a989ce1 100644 --- a/compiler/rustc_mir_transform/src/pass_manager.rs +++ b/compiler/rustc_mir_transform/src/pass_manager.rs @@ -79,6 +79,12 @@ pub(super) trait MirPass<'tcx> { true } + /// Returns `true` if this pass can be overridden by `-Zenable-mir-passes`. This should be + /// true for basically every pass other than those that are necessary for correctness. + fn can_be_overridden(&self) -> bool { + true + } + fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>); fn is_mir_dump_enabled(&self) -> bool { @@ -176,6 +182,10 @@ where { let name = pass.name(); + if !pass.can_be_overridden() { + return pass.is_enabled(tcx.sess); + } + let overridden_passes = &tcx.sess.opts.unstable_opts.mir_enable_passes; let overridden = overridden_passes.iter().rev().find(|(s, _)| s == &*name).map(|(_name, polarity)| { diff --git a/compiler/rustc_mir_transform/src/validate.rs b/compiler/rustc_mir_transform/src/validate.rs index a670da94fcc..9444c5b61dd 100644 --- a/compiler/rustc_mir_transform/src/validate.rs +++ b/compiler/rustc_mir_transform/src/validate.rs @@ -1,6 +1,7 @@ //! Validates the MIR to ensure that invariants are upheld. use rustc_abi::{ExternAbi, FIRST_VARIANT, Size}; +use rustc_attr_parsing::InlineAttr; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_hir::LangItem; use rustc_index::IndexVec; @@ -79,7 +80,7 @@ impl<'tcx> crate::MirPass<'tcx> for Validator { cfg_checker.fail(location, msg); } - if let MirPhase::Runtime(_) = body.phase { + if let MirPhase::Runtime(phase) = body.phase { if let ty::InstanceKind::Item(_) = body.source.instance { if body.has_free_regions() { cfg_checker.fail( @@ -88,6 +89,27 @@ impl<'tcx> crate::MirPass<'tcx> for Validator { ); } } + + if phase >= RuntimePhase::Optimized + && body + .basic_blocks + .iter() + .filter_map(|bb| match &bb.terminator().kind { + TerminatorKind::Call { func, .. } + | TerminatorKind::TailCall { func, .. } => Some(func), + _ => None, + }) + .filter_map(|func| match func.ty(&body.local_decls, tcx).kind() { + ty::FnDef(did, ..) => Some(did), + _ => None, + }) + .any(|did| matches!(tcx.codegen_fn_attrs(did).inline, InlineAttr::Force { .. })) + { + cfg_checker.fail( + Location::START, + "`#[rustc_force_inline]`-annotated function not inlined", + ); + } } } } diff --git a/compiler/rustc_monomorphize/src/collector.rs b/compiler/rustc_monomorphize/src/collector.rs index 00aae03704f..f3232a0e15d 100644 --- a/compiler/rustc_monomorphize/src/collector.rs +++ b/compiler/rustc_monomorphize/src/collector.rs @@ -207,6 +207,7 @@ use std::path::PathBuf; +use rustc_attr_parsing::InlineAttr; use rustc_data_structures::fx::FxIndexMap; use rustc_data_structures::sync::{LRef, MTLock, par_for_each_in}; use rustc_data_structures::unord::{UnordMap, UnordSet}; @@ -959,6 +960,14 @@ fn should_codegen_locally<'tcx>(tcx: TyCtxtAt<'tcx>, instance: Instance<'tcx>) - return false; } + if tcx.def_kind(def_id).has_codegen_attrs() + && matches!(tcx.codegen_fn_attrs(def_id).inline, InlineAttr::Force { .. }) + { + // `#[rustc_force_inline]` items should never be codegened. This should be caught by + // the MIR validator. + return false; + } + if def_id.is_local() { // Local items cannot be referred to locally without monomorphizing them locally. return true; diff --git a/compiler/rustc_passes/messages.ftl b/compiler/rustc_passes/messages.ftl index f39bea2a56f..9df396cbf7a 100644 --- a/compiler/rustc_passes/messages.ftl +++ b/compiler/rustc_passes/messages.ftl @@ -656,6 +656,14 @@ passes_rustc_allow_const_fn_unstable = passes_rustc_dirty_clean = attribute requires -Z query-dep-graph to be enabled +passes_rustc_force_inline = + attribute should be applied to a function + .label = not a function definition + +passes_rustc_force_inline_coro = + attribute cannot be applied to a `async`, `gen` or `async gen` function + .label = `async`, `gen` or `async gen` function + passes_rustc_layout_scalar_valid_range_arg = expected exactly one integer literal argument diff --git a/compiler/rustc_passes/src/check_attr.rs b/compiler/rustc_passes/src/check_attr.rs index 7656ca86c18..1b2b8ac5dd9 100644 --- a/compiler/rustc_passes/src/check_attr.rs +++ b/compiler/rustc_passes/src/check_attr.rs @@ -247,7 +247,7 @@ impl<'tcx> CheckAttrVisitor<'tcx> { self.check_coroutine(attr, target); } [sym::linkage, ..] => self.check_linkage(attr, span, target), - [sym::rustc_pub_transparent, ..] => self.check_rustc_pub_transparent( attr.span, span, attrs), + [sym::rustc_pub_transparent, ..] => self.check_rustc_pub_transparent(attr.span, span, attrs), [ // ok sym::allow @@ -332,6 +332,7 @@ impl<'tcx> CheckAttrVisitor<'tcx> { self.check_repr(attrs, span, target, item, hir_id); self.check_used(attrs, target, span); + self.check_rustc_force_inline(hir_id, attrs, span, target); } fn inline_attr_str_error_with_macro_def(&self, hir_id: HirId, attr: &Attribute, sym: &str) { @@ -2480,6 +2481,45 @@ impl<'tcx> CheckAttrVisitor<'tcx> { } } + fn check_rustc_force_inline( + &self, + hir_id: HirId, + attrs: &[Attribute], + span: Span, + target: Target, + ) { + let force_inline_attr = attrs.iter().find(|attr| attr.has_name(sym::rustc_force_inline)); + match (target, force_inline_attr) { + (Target::Closure, None) => { + let is_coro = matches!( + self.tcx.hir().expect_expr(hir_id).kind, + hir::ExprKind::Closure(hir::Closure { + kind: hir::ClosureKind::Coroutine(..) + | hir::ClosureKind::CoroutineClosure(..), + .. + }) + ); + let parent_did = self.tcx.hir().get_parent_item(hir_id).to_def_id(); + let parent_span = self.tcx.def_span(parent_did); + let parent_force_inline_attr = + self.tcx.get_attr(parent_did, sym::rustc_force_inline); + if let Some(attr) = parent_force_inline_attr + && is_coro + { + self.dcx().emit_err(errors::RustcForceInlineCoro { + attr_span: attr.span, + span: parent_span, + }); + } + } + (Target::Fn, _) => (), + (_, Some(attr)) => { + self.dcx().emit_err(errors::RustcForceInline { attr_span: attr.span, span }); + } + (_, None) => (), + } + } + /// Checks if `#[autodiff]` is applied to an item other than a function item. fn check_autodiff(&self, _hir_id: HirId, _attr: &Attribute, span: Span, target: Target) { debug!("check_autodiff"); diff --git a/compiler/rustc_passes/src/errors.rs b/compiler/rustc_passes/src/errors.rs index d95fa5db0ce..13da021c614 100644 --- a/compiler/rustc_passes/src/errors.rs +++ b/compiler/rustc_passes/src/errors.rs @@ -689,6 +689,24 @@ pub(crate) struct RustcPubTransparent { } #[derive(Diagnostic)] +#[diag(passes_rustc_force_inline)] +pub(crate) struct RustcForceInline { + #[primary_span] + pub attr_span: Span, + #[label] + pub span: Span, +} + +#[derive(Diagnostic)] +#[diag(passes_rustc_force_inline_coro)] +pub(crate) struct RustcForceInlineCoro { + #[primary_span] + pub attr_span: Span, + #[label] + pub span: Span, +} + +#[derive(Diagnostic)] #[diag(passes_link_ordinal)] pub(crate) struct LinkOrdinal { #[primary_span] diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index ef1e2c20978..a5bf5e06485 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -1731,6 +1731,7 @@ symbols! { rustc_error, rustc_evaluate_where_clauses, rustc_expected_cgu_reuse, + rustc_force_inline, rustc_has_incoherent_inherent_impls, rustc_hidden_type_of_opaques, rustc_if_this_changed, diff --git a/compiler/rustc_trait_selection/messages.ftl b/compiler/rustc_trait_selection/messages.ftl index 1ab89ecde7a..b82bb27eb79 100644 --- a/compiler/rustc_trait_selection/messages.ftl +++ b/compiler/rustc_trait_selection/messages.ftl @@ -251,7 +251,9 @@ trait_selection_no_value_in_rustc_on_unimplemented = this attribute must have a trait_selection_nothing = {""} -trait_selection_oc_cant_coerce = cannot coerce intrinsics to function pointers +trait_selection_oc_cant_coerce_force_inline = + cannot coerce functions which must be inlined to function pointers +trait_selection_oc_cant_coerce_intrinsic = cannot coerce intrinsics to function pointers trait_selection_oc_closure_selfref = closure/coroutine type that references itself trait_selection_oc_const_compat = const not compatible with trait trait_selection_oc_fn_lang_correct_type = {$lang_item_name -> diff --git a/compiler/rustc_trait_selection/src/error_reporting/infer/mod.rs b/compiler/rustc_trait_selection/src/error_reporting/infer/mod.rs index d279590d45a..53300c95fa7 100644 --- a/compiler/rustc_trait_selection/src/error_reporting/infer/mod.rs +++ b/compiler/rustc_trait_selection/src/error_reporting/infer/mod.rs @@ -2294,7 +2294,7 @@ impl<'tcx> ObligationCause<'tcx> { { FailureCode::Error0644 } - TypeError::IntrinsicCast => FailureCode::Error0308, + TypeError::IntrinsicCast | TypeError::ForceInlineCast => FailureCode::Error0308, _ => FailureCode::Error0308, }, } @@ -2360,8 +2360,11 @@ impl<'tcx> ObligationCause<'tcx> { { ObligationCauseFailureCode::ClosureSelfref { span } } + TypeError::ForceInlineCast => { + ObligationCauseFailureCode::CantCoerceForceInline { span, subdiags } + } TypeError::IntrinsicCast => { - ObligationCauseFailureCode::CantCoerce { span, subdiags } + ObligationCauseFailureCode::CantCoerceIntrinsic { span, subdiags } } _ => ObligationCauseFailureCode::Generic { span, subdiags }, }, diff --git a/compiler/rustc_trait_selection/src/errors.rs b/compiler/rustc_trait_selection/src/errors.rs index a8fddff4e4a..53a4e5031c6 100644 --- a/compiler/rustc_trait_selection/src/errors.rs +++ b/compiler/rustc_trait_selection/src/errors.rs @@ -1729,8 +1729,15 @@ pub enum ObligationCauseFailureCode { #[primary_span] span: Span, }, - #[diag(trait_selection_oc_cant_coerce, code = E0308)] - CantCoerce { + #[diag(trait_selection_oc_cant_coerce_force_inline, code = E0308)] + CantCoerceForceInline { + #[primary_span] + span: Span, + #[subdiagnostic] + subdiags: Vec<TypeErrorAdditionalDiags>, + }, + #[diag(trait_selection_oc_cant_coerce_intrinsic, code = E0308)] + CantCoerceIntrinsic { #[primary_span] span: Span, #[subdiagnostic] diff --git a/compiler/rustc_type_ir/src/error.rs b/compiler/rustc_type_ir/src/error.rs index 55671b84dbc..68b11489ae7 100644 --- a/compiler/rustc_type_ir/src/error.rs +++ b/compiler/rustc_type_ir/src/error.rs @@ -51,6 +51,9 @@ pub enum TypeError<I: Interner> { ConstMismatch(ExpectedFound<I::Const>), IntrinsicCast, + /// `#[rustc_force_inline]` functions must be inlined and must not be codegened independently, + /// so casting to a function pointer must be prohibited. + ForceInlineCast, /// Safe `#[target_feature]` functions are not assignable to safe function pointers. TargetFeatureCast(I::DefId), } @@ -83,6 +86,7 @@ impl<I: Interner> TypeError<I> { | ProjectionMismatched(_) | ExistentialMismatch(_) | ConstMismatch(_) + | ForceInlineCast | IntrinsicCast => true, } } |
