about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--compiler/rustc_codegen_ssa/src/back/symbol_export.rs13
-rw-r--r--compiler/rustc_middle/src/mir/mono.rs75
-rw-r--r--compiler/rustc_middle/src/ty/instance.rs44
3 files changed, 68 insertions, 64 deletions
diff --git a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs
index f8f7bb2dbc6..76b0a566f8e 100644
--- a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs
+++ b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs
@@ -93,16 +93,11 @@ fn reachable_non_generics_provider(tcx: TyCtxt<'_>, _: LocalCrate) -> DefIdMap<S
                 return None;
             }
 
-            // Functions marked with #[inline] are codegened with "internal"
-            // linkage and are not exported unless marked with an extern
-            // indicator
-            if !Instance::mono(tcx, def_id.to_def_id()).def.generates_cgu_internal_copy(tcx)
-                || tcx.codegen_fn_attrs(def_id.to_def_id()).contains_extern_indicator()
-            {
-                Some(def_id)
-            } else {
-                None
+            if Instance::mono(tcx, def_id.into()).def.requires_inline(tcx) {
+                return None;
             }
+
+            if tcx.cross_crate_inlinable(def_id) { None } else { Some(def_id) }
         })
         .map(|def_id| {
             // We won't link right if this symbol is stripped during LTO.
diff --git a/compiler/rustc_middle/src/mir/mono.rs b/compiler/rustc_middle/src/mir/mono.rs
index d4a9aac3733..de1bbe81cab 100644
--- a/compiler/rustc_middle/src/mir/mono.rs
+++ b/compiler/rustc_middle/src/mir/mono.rs
@@ -20,7 +20,7 @@ use tracing::debug;
 
 use crate::dep_graph::{DepNode, WorkProduct, WorkProductId};
 use crate::middle::codegen_fn_attrs::CodegenFnAttrFlags;
-use crate::ty::{GenericArgs, Instance, InstanceKind, SymbolName, TyCtxt};
+use crate::ty::{self, GenericArgs, Instance, InstanceKind, SymbolName, Ty, TyCtxt};
 
 /// Describes how a monomorphization will be instantiated in object files.
 #[derive(PartialEq)]
@@ -54,6 +54,39 @@ pub enum MonoItem<'tcx> {
     GlobalAsm(ItemId),
 }
 
+fn opt_incr_drop_glue_mode<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> InstantiationMode {
+    // Non-ADTs can't have a Drop impl. This case is mostly hit by closures whose captures require
+    // dropping.
+    let ty::Adt(adt_def, _) = ty.kind() else {
+        return InstantiationMode::LocalCopy;
+    };
+
+    // Types that don't have a direct Drop impl, but have fields that require dropping.
+    let Some(dtor) = adt_def.destructor(tcx) else {
+        // We use LocalCopy for drops of enums only; this code is inherited from
+        // https://github.com/rust-lang/rust/pull/67332 and the theory is that we get to optimize
+        // out code like drop_in_place(Option::None) before crate-local ThinLTO, which improves
+        // compile time. At the time of writing, simply removing this entire check does seem to
+        // regress incr-opt compile times. But it sure seems like a more sophisticated check could
+        // do better here.
+        if adt_def.is_enum() {
+            return InstantiationMode::LocalCopy;
+        } else {
+            return InstantiationMode::GloballyShared { may_conflict: true };
+        }
+    };
+
+    // We've gotten to a drop_in_place for a type that directly implements Drop.
+    // The drop glue is a wrapper for the Drop::drop impl, and we are an optimized build, so in an
+    // effort to coordinate with the mode that the actual impl will get, we make the glue also
+    // LocalCopy.
+    if tcx.cross_crate_inlinable(dtor.did) {
+        InstantiationMode::LocalCopy
+    } else {
+        InstantiationMode::GloballyShared { may_conflict: true }
+    }
+}
+
 impl<'tcx> MonoItem<'tcx> {
     /// Returns `true` if the mono item is user-defined (i.e. not compiler-generated, like shims).
     pub fn is_user_defined(&self) -> bool {
@@ -123,16 +156,36 @@ impl<'tcx> MonoItem<'tcx> {
             return InstantiationMode::GloballyShared { may_conflict: false };
         }
 
-        // FIXME: The logic for which functions are permitted to get LocalCopy is actually spread
-        // across 4 functions:
-        // * cross_crate_inlinable(def_id)
-        // * InstanceKind::requires_inline
-        // * InstanceKind::generate_cgu_internal_copy
-        // * MonoItem::instantiation_mode
-        // Since reachable_non_generics calls InstanceKind::generates_cgu_internal_copy to decide
-        // which symbols this crate exports, we are obligated to only generate LocalCopy when
-        // generates_cgu_internal_copy returns true.
-        if !instance.def.generates_cgu_internal_copy(tcx) {
+        // This is technically a heuristic even though it's in the "not a heuristic" part of
+        // instantiation mode selection.
+        // It is surely possible to untangle this; the root problem is that the way we instantiate
+        // InstanceKind other than Item is very complicated.
+        //
+        // The fallback case is to give everything else GloballyShared at OptLevel::No and
+        // LocalCopy at all other opt levels. This is a good default, except for one specific build
+        // configuration: Optimized incremental builds.
+        // In the current compiler architecture there is a fundamental tension between
+        // optimizations (which want big CGUs with as many things LocalCopy as possible) and
+        // incrementality (which wants small CGUs with as many things GloballyShared as possible).
+        // The heuristics implemented here do better than a completely naive approach in the
+        // compiler benchmark suite, but there is no reason to believe they are optimal.
+        if let InstanceKind::DropGlue(_, Some(ty)) = instance.def {
+            if tcx.sess.opts.optimize == OptLevel::No {
+                return InstantiationMode::GloballyShared { may_conflict: false };
+            }
+            if tcx.sess.opts.incremental.is_none() {
+                return InstantiationMode::LocalCopy;
+            }
+            return opt_incr_drop_glue_mode(tcx, ty);
+        }
+
+        // We need to ensure that we do not decide the InstantiationMode of an exported symbol is
+        // LocalCopy. Since exported symbols are computed based on the output of
+        // cross_crate_inlinable, we are beholden to our previous decisions.
+        //
+        // Note that just like above, this check for requires_inline is technically a heuristic
+        // even though it's in the "not a heuristic" part of instantiation mode selection.
+        if !tcx.cross_crate_inlinable(instance.def_id()) && !instance.def.requires_inline(tcx) {
             return InstantiationMode::GloballyShared { may_conflict: false };
         }
 
diff --git a/compiler/rustc_middle/src/ty/instance.rs b/compiler/rustc_middle/src/ty/instance.rs
index b7a648aae3f..c557178096e 100644
--- a/compiler/rustc_middle/src/ty/instance.rs
+++ b/compiler/rustc_middle/src/ty/instance.rs
@@ -301,50 +301,6 @@ impl<'tcx> InstanceKind<'tcx> {
         )
     }
 
-    /// Returns `true` if the machine code for this instance is instantiated in
-    /// each codegen unit that references it.
-    /// Note that this is only a hint! The compiler can globally decide to *not*
-    /// do this in order to speed up compilation. CGU-internal copies are
-    /// only exist to enable inlining. If inlining is not performed (e.g. at
-    /// `-Copt-level=0`) then the time for generating them is wasted and it's
-    /// better to create a single copy with external linkage.
-    pub fn generates_cgu_internal_copy(&self, tcx: TyCtxt<'tcx>) -> bool {
-        if self.requires_inline(tcx) {
-            return true;
-        }
-        if let ty::InstanceKind::DropGlue(.., Some(ty))
-        | ty::InstanceKind::AsyncDropGlueCtorShim(.., Some(ty)) = *self
-        {
-            // Drop glue generally wants to be instantiated at every codegen
-            // unit, but without an #[inline] hint. We should make this
-            // available to normal end-users.
-            if tcx.sess.opts.incremental.is_none() {
-                return true;
-            }
-            // When compiling with incremental, we can generate a *lot* of
-            // codegen units. Including drop glue into all of them has a
-            // considerable compile time cost.
-            //
-            // We include enums without destructors to allow, say, optimizing
-            // drops of `Option::None` before LTO. We also respect the intent of
-            // `#[inline]` on `Drop::drop` implementations.
-            return ty.ty_adt_def().is_none_or(|adt_def| {
-                match *self {
-                    ty::InstanceKind::DropGlue(..) => adt_def.destructor(tcx).map(|dtor| dtor.did),
-                    ty::InstanceKind::AsyncDropGlueCtorShim(..) => {
-                        adt_def.async_destructor(tcx).map(|dtor| dtor.ctor)
-                    }
-                    _ => unreachable!(),
-                }
-                .map_or_else(|| adt_def.is_enum(), |did| tcx.cross_crate_inlinable(did))
-            });
-        }
-        if let ty::InstanceKind::ThreadLocalShim(..) = *self {
-            return false;
-        }
-        tcx.cross_crate_inlinable(self.def_id())
-    }
-
     pub fn requires_caller_location(&self, tcx: TyCtxt<'_>) -> bool {
         match *self {
             InstanceKind::Item(def_id) | InstanceKind::Virtual(def_id, _) => {