about summary refs log tree commit diff
path: root/compiler/rustc_codegen_ssa
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_codegen_ssa')
-rw-r--r--compiler/rustc_codegen_ssa/src/back/link.rs7
-rw-r--r--compiler/rustc_codegen_ssa/src/back/linker.rs33
-rw-r--r--compiler/rustc_codegen_ssa/src/back/lto.rs60
-rw-r--r--compiler/rustc_codegen_ssa/src/back/write.rs174
-rw-r--r--compiler/rustc_codegen_ssa/src/base.rs9
-rw-r--r--compiler/rustc_codegen_ssa/src/meth.rs3
-rw-r--r--compiler/rustc_codegen_ssa/src/mir/debuginfo.rs2
-rw-r--r--compiler/rustc_codegen_ssa/src/mir/mod.rs5
-rw-r--r--compiler/rustc_codegen_ssa/src/mir/naked_asm.rs2
-rw-r--r--compiler/rustc_codegen_ssa/src/mir/rvalue.rs12
-rw-r--r--compiler/rustc_codegen_ssa/src/mono_item.rs10
-rw-r--r--compiler/rustc_codegen_ssa/src/traits/type_.rs6
-rw-r--r--compiler/rustc_codegen_ssa/src/traits/write.rs24
13 files changed, 129 insertions, 218 deletions
diff --git a/compiler/rustc_codegen_ssa/src/back/link.rs b/compiler/rustc_codegen_ssa/src/back/link.rs
index b46773396fc..5ce301c0eb9 100644
--- a/compiler/rustc_codegen_ssa/src/back/link.rs
+++ b/compiler/rustc_codegen_ssa/src/back/link.rs
@@ -2542,12 +2542,7 @@ fn add_order_independent_options(
         // sections to ensure we have all the data for PGO.
         let keep_metadata =
             crate_type == CrateType::Dylib || sess.opts.cg.profile_generate.enabled();
-        if crate_type != CrateType::Executable || !sess.opts.unstable_opts.export_executable_symbols
-        {
-            cmd.gc_sections(keep_metadata);
-        } else {
-            cmd.no_gc_sections();
-        }
+        cmd.gc_sections(keep_metadata);
     }
 
     cmd.set_output_kind(link_output_kind, crate_type, out_filename);
diff --git a/compiler/rustc_codegen_ssa/src/back/linker.rs b/compiler/rustc_codegen_ssa/src/back/linker.rs
index e0a3ad55be0..050797354b4 100644
--- a/compiler/rustc_codegen_ssa/src/back/linker.rs
+++ b/compiler/rustc_codegen_ssa/src/back/linker.rs
@@ -326,7 +326,6 @@ pub(crate) trait Linker {
         link_or_cc_args(self, &[path]);
     }
     fn gc_sections(&mut self, keep_metadata: bool);
-    fn no_gc_sections(&mut self);
     fn full_relro(&mut self);
     fn partial_relro(&mut self);
     fn no_relro(&mut self);
@@ -688,12 +687,6 @@ impl<'a> Linker for GccLinker<'a> {
         }
     }
 
-    fn no_gc_sections(&mut self) {
-        if self.is_gnu || self.sess.target.is_like_wasm {
-            self.link_arg("--no-gc-sections");
-        }
-    }
-
     fn optimize(&mut self) {
         if !self.is_gnu && !self.sess.target.is_like_wasm {
             return;
@@ -1010,10 +1003,6 @@ impl<'a> Linker for MsvcLinker<'a> {
         }
     }
 
-    fn no_gc_sections(&mut self) {
-        self.link_arg("/OPT:NOREF,NOICF");
-    }
-
     fn full_relro(&mut self) {
         // noop
     }
@@ -1243,10 +1232,6 @@ impl<'a> Linker for EmLinker<'a> {
         // noop
     }
 
-    fn no_gc_sections(&mut self) {
-        // noop
-    }
-
     fn optimize(&mut self) {
         // Emscripten performs own optimizations
         self.cc_arg(match self.sess.opts.optimize {
@@ -1418,10 +1403,6 @@ impl<'a> Linker for WasmLd<'a> {
         self.link_arg("--gc-sections");
     }
 
-    fn no_gc_sections(&mut self) {
-        self.link_arg("--no-gc-sections");
-    }
-
     fn optimize(&mut self) {
         // The -O flag is, as of late 2023, only used for merging of strings and debuginfo, and
         // only differentiates -O0 and -O1. It does not apply to LTO.
@@ -1567,10 +1548,6 @@ impl<'a> Linker for L4Bender<'a> {
         }
     }
 
-    fn no_gc_sections(&mut self) {
-        self.link_arg("--no-gc-sections");
-    }
-
     fn optimize(&mut self) {
         // GNU-style linkers support optimization with -O. GNU ld doesn't
         // need a numeric argument, but other linkers do.
@@ -1734,10 +1711,6 @@ impl<'a> Linker for AixLinker<'a> {
         self.link_arg("-bgc");
     }
 
-    fn no_gc_sections(&mut self) {
-        self.link_arg("-bnogc");
-    }
-
     fn optimize(&mut self) {}
 
     fn pgo_gen(&mut self) {
@@ -1982,8 +1955,6 @@ impl<'a> Linker for PtxLinker<'a> {
 
     fn gc_sections(&mut self, _keep_metadata: bool) {}
 
-    fn no_gc_sections(&mut self) {}
-
     fn pgo_gen(&mut self) {}
 
     fn no_crt_objects(&mut self) {}
@@ -2057,8 +2028,6 @@ impl<'a> Linker for LlbcLinker<'a> {
 
     fn gc_sections(&mut self, _keep_metadata: bool) {}
 
-    fn no_gc_sections(&mut self) {}
-
     fn pgo_gen(&mut self) {}
 
     fn no_crt_objects(&mut self) {}
@@ -2139,8 +2108,6 @@ impl<'a> Linker for BpfLinker<'a> {
 
     fn gc_sections(&mut self, _keep_metadata: bool) {}
 
-    fn no_gc_sections(&mut self) {}
-
     fn pgo_gen(&mut self) {}
 
     fn no_crt_objects(&mut self) {}
diff --git a/compiler/rustc_codegen_ssa/src/back/lto.rs b/compiler/rustc_codegen_ssa/src/back/lto.rs
index ce6fe8a191b..b49b6783bbd 100644
--- a/compiler/rustc_codegen_ssa/src/back/lto.rs
+++ b/compiler/rustc_codegen_ssa/src/back/lto.rs
@@ -1,13 +1,8 @@
 use std::ffi::CString;
 use std::sync::Arc;
 
-use rustc_ast::expand::autodiff_attrs::AutoDiffItem;
 use rustc_data_structures::memmap::Mmap;
-use rustc_errors::FatalError;
 
-use super::write::CodegenContext;
-use crate::ModuleCodegen;
-use crate::back::write::ModuleConfig;
 use crate::traits::*;
 
 pub struct ThinModule<B: WriteBackendMethods> {
@@ -42,61 +37,6 @@ pub struct ThinShared<B: WriteBackendMethods> {
     pub module_names: Vec<CString>,
 }
 
-pub enum LtoModuleCodegen<B: WriteBackendMethods> {
-    Fat(ModuleCodegen<B::Module>),
-    Thin(ThinModule<B>),
-}
-
-impl<B: WriteBackendMethods> LtoModuleCodegen<B> {
-    pub fn name(&self) -> &str {
-        match *self {
-            LtoModuleCodegen::Fat(_) => "everything",
-            LtoModuleCodegen::Thin(ref m) => m.name(),
-        }
-    }
-
-    /// Optimize this module within the given codegen context.
-    pub fn optimize(
-        self,
-        cgcx: &CodegenContext<B>,
-    ) -> Result<ModuleCodegen<B::Module>, FatalError> {
-        match self {
-            LtoModuleCodegen::Fat(mut module) => {
-                B::optimize_fat(cgcx, &mut module)?;
-                Ok(module)
-            }
-            LtoModuleCodegen::Thin(thin) => B::optimize_thin(cgcx, thin),
-        }
-    }
-
-    /// A "gauge" of how costly it is to optimize this module, used to sort
-    /// biggest modules first.
-    pub fn cost(&self) -> u64 {
-        match *self {
-            // Only one module with fat LTO, so the cost doesn't matter.
-            LtoModuleCodegen::Fat(_) => 0,
-            LtoModuleCodegen::Thin(ref m) => m.cost(),
-        }
-    }
-
-    /// Run autodiff on Fat LTO module
-    pub fn autodiff(
-        self,
-        cgcx: &CodegenContext<B>,
-        diff_fncs: Vec<AutoDiffItem>,
-        config: &ModuleConfig,
-    ) -> Result<LtoModuleCodegen<B>, FatalError> {
-        match &self {
-            LtoModuleCodegen::Fat(module) => {
-                B::autodiff(cgcx, &module, diff_fncs, config)?;
-            }
-            _ => panic!("autodiff called with non-fat LTO module"),
-        }
-
-        Ok(self)
-    }
-}
-
 pub enum SerializedModule<M: ModuleBufferMethods> {
     Local(M),
     FromRlib(Vec<u8>),
diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index d2a64ec2993..50a7cba300b 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -397,50 +397,31 @@ impl<B: WriteBackendMethods> CodegenContext<B> {
     }
 }
 
-fn generate_lto_work<B: ExtraBackendMethods>(
+fn generate_thin_lto_work<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
-    autodiff: Vec<AutoDiffItem>,
-    needs_fat_lto: Vec<FatLtoInput<B>>,
     needs_thin_lto: Vec<(String, B::ThinBuffer)>,
     import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
 ) -> Vec<(WorkItem<B>, u64)> {
-    let _prof_timer = cgcx.prof.generic_activity("codegen_generate_lto_work");
-
-    if !needs_fat_lto.is_empty() {
-        assert!(needs_thin_lto.is_empty());
-        let mut module =
-            B::run_fat_lto(cgcx, needs_fat_lto, import_only_modules).unwrap_or_else(|e| e.raise());
-        if cgcx.lto == Lto::Fat && !autodiff.is_empty() {
-            let config = cgcx.config(ModuleKind::Regular);
-            module = module.autodiff(cgcx, autodiff, config).unwrap_or_else(|e| e.raise());
-        }
-        // We are adding a single work item, so the cost doesn't matter.
-        vec![(WorkItem::LTO(module), 0)]
-    } else {
-        if !autodiff.is_empty() {
-            let dcx = cgcx.create_dcx();
-            dcx.handle().emit_fatal(AutodiffWithoutLto {});
-        }
-        assert!(needs_fat_lto.is_empty());
-        let (lto_modules, copy_jobs) = B::run_thin_lto(cgcx, needs_thin_lto, import_only_modules)
-            .unwrap_or_else(|e| e.raise());
-        lto_modules
-            .into_iter()
-            .map(|module| {
-                let cost = module.cost();
-                (WorkItem::LTO(module), cost)
-            })
-            .chain(copy_jobs.into_iter().map(|wp| {
-                (
-                    WorkItem::CopyPostLtoArtifacts(CachedModuleCodegen {
-                        name: wp.cgu_name.clone(),
-                        source: wp,
-                    }),
-                    0, // copying is very cheap
-                )
-            }))
-            .collect()
-    }
+    let _prof_timer = cgcx.prof.generic_activity("codegen_thin_generate_lto_work");
+
+    let (lto_modules, copy_jobs) =
+        B::run_thin_lto(cgcx, needs_thin_lto, import_only_modules).unwrap_or_else(|e| e.raise());
+    lto_modules
+        .into_iter()
+        .map(|module| {
+            let cost = module.cost();
+            (WorkItem::ThinLto(module), cost)
+        })
+        .chain(copy_jobs.into_iter().map(|wp| {
+            (
+                WorkItem::CopyPostLtoArtifacts(CachedModuleCodegen {
+                    name: wp.cgu_name.clone(),
+                    source: wp,
+                }),
+                0, // copying is very cheap
+            )
+        }))
+        .collect()
 }
 
 struct CompiledModules {
@@ -470,6 +451,7 @@ pub(crate) fn start_async_codegen<B: ExtraBackendMethods>(
     backend: B,
     tcx: TyCtxt<'_>,
     target_cpu: String,
+    autodiff_items: &[AutoDiffItem],
 ) -> OngoingCodegen<B> {
     let (coordinator_send, coordinator_receive) = channel();
 
@@ -488,6 +470,7 @@ pub(crate) fn start_async_codegen<B: ExtraBackendMethods>(
         backend.clone(),
         tcx,
         &crate_info,
+        autodiff_items,
         shared_emitter,
         codegen_worker_send,
         coordinator_receive,
@@ -736,15 +719,23 @@ pub(crate) enum WorkItem<B: WriteBackendMethods> {
     /// Copy the post-LTO artifacts from the incremental cache to the output
     /// directory.
     CopyPostLtoArtifacts(CachedModuleCodegen),
-    /// Performs (Thin)LTO on the given module.
-    LTO(lto::LtoModuleCodegen<B>),
+    /// Performs fat LTO on the given module.
+    FatLto {
+        needs_fat_lto: Vec<FatLtoInput<B>>,
+        import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
+        autodiff: Vec<AutoDiffItem>,
+    },
+    /// Performs thin-LTO on the given module.
+    ThinLto(lto::ThinModule<B>),
 }
 
 impl<B: WriteBackendMethods> WorkItem<B> {
     fn module_kind(&self) -> ModuleKind {
         match *self {
             WorkItem::Optimize(ref m) => m.kind,
-            WorkItem::CopyPostLtoArtifacts(_) | WorkItem::LTO(_) => ModuleKind::Regular,
+            WorkItem::CopyPostLtoArtifacts(_) | WorkItem::FatLto { .. } | WorkItem::ThinLto(_) => {
+                ModuleKind::Regular
+            }
         }
     }
 
@@ -792,7 +783,8 @@ impl<B: WriteBackendMethods> WorkItem<B> {
         match self {
             WorkItem::Optimize(m) => desc("opt", "optimize module", &m.name),
             WorkItem::CopyPostLtoArtifacts(m) => desc("cpy", "copy LTO artifacts for", &m.name),
-            WorkItem::LTO(m) => desc("lto", "LTO module", m.name()),
+            WorkItem::FatLto { .. } => desc("lto", "fat LTO module", "everything"),
+            WorkItem::ThinLto(m) => desc("lto", "thin-LTO module", m.name()),
         }
     }
 }
@@ -996,12 +988,24 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
     })
 }
 
-fn execute_lto_work_item<B: ExtraBackendMethods>(
+fn execute_fat_lto_work_item<B: ExtraBackendMethods>(
+    cgcx: &CodegenContext<B>,
+    needs_fat_lto: Vec<FatLtoInput<B>>,
+    import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
+    autodiff: Vec<AutoDiffItem>,
+    module_config: &ModuleConfig,
+) -> Result<WorkItemResult<B>, FatalError> {
+    let module = B::run_and_optimize_fat_lto(cgcx, needs_fat_lto, import_only_modules, autodiff)?;
+    let module = B::codegen(cgcx, module, module_config)?;
+    Ok(WorkItemResult::Finished(module))
+}
+
+fn execute_thin_lto_work_item<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
-    module: lto::LtoModuleCodegen<B>,
+    module: lto::ThinModule<B>,
     module_config: &ModuleConfig,
 ) -> Result<WorkItemResult<B>, FatalError> {
-    let module = module.optimize(cgcx)?;
+    let module = B::optimize_thin(cgcx, module)?;
     finish_intra_module_work(cgcx, module, module_config)
 }
 
@@ -1010,11 +1014,8 @@ fn finish_intra_module_work<B: ExtraBackendMethods>(
     module: ModuleCodegen<B::Module>,
     module_config: &ModuleConfig,
 ) -> Result<WorkItemResult<B>, FatalError> {
-    let dcx = cgcx.create_dcx();
-    let dcx = dcx.handle();
-
     if !cgcx.opts.unstable_opts.combine_cgu || module.kind == ModuleKind::Allocator {
-        let module = B::codegen(cgcx, dcx, module, module_config)?;
+        let module = B::codegen(cgcx, module, module_config)?;
         Ok(WorkItemResult::Finished(module))
     } else {
         Ok(WorkItemResult::NeedsLink(module))
@@ -1031,9 +1032,6 @@ pub(crate) enum Message<B: WriteBackendMethods> {
     /// Sent from a backend worker thread.
     WorkItem { result: Result<WorkItemResult<B>, Option<WorkerFatalError>>, worker_id: usize },
 
-    /// A vector containing all the AutoDiff tasks that we have to pass to Enzyme.
-    AddAutoDiffItems(Vec<AutoDiffItem>),
-
     /// The frontend has finished generating something (backend IR or a
     /// post-LTO artifact) for a codegen unit, and it should be passed to the
     /// backend. Sent from the main thread.
@@ -1100,6 +1098,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
     backend: B,
     tcx: TyCtxt<'_>,
     crate_info: &CrateInfo,
+    autodiff_items: &[AutoDiffItem],
     shared_emitter: SharedEmitter,
     codegen_worker_send: Sender<CguMessage>,
     coordinator_receive: Receiver<Box<dyn Any + Send>>,
@@ -1109,6 +1108,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
 ) -> thread::JoinHandle<Result<CompiledModules, ()>> {
     let coordinator_send = tx_to_llvm_workers;
     let sess = tcx.sess;
+    let autodiff_items = autodiff_items.to_vec();
 
     let mut each_linked_rlib_for_lto = Vec::new();
     drop(link::each_linked_rlib(crate_info, None, &mut |cnum, path| {
@@ -1362,7 +1362,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
 
         // This is where we collect codegen units that have gone all the way
         // through codegen and LLVM.
-        let mut autodiff_items = Vec::new();
         let mut compiled_modules = vec![];
         let mut compiled_allocator_module = None;
         let mut needs_link = Vec::new();
@@ -1474,20 +1473,37 @@ fn start_executing_work<B: ExtraBackendMethods>(
                     let needs_thin_lto = mem::take(&mut needs_thin_lto);
                     let import_only_modules = mem::take(&mut lto_import_only_modules);
 
-                    for (work, cost) in generate_lto_work(
-                        &cgcx,
-                        autodiff_items.clone(),
-                        needs_fat_lto,
-                        needs_thin_lto,
-                        import_only_modules,
-                    ) {
-                        let insertion_index = work_items
-                            .binary_search_by_key(&cost, |&(_, cost)| cost)
-                            .unwrap_or_else(|e| e);
-                        work_items.insert(insertion_index, (work, cost));
+                    if !needs_fat_lto.is_empty() {
+                        assert!(needs_thin_lto.is_empty());
+
+                        work_items.push((
+                            WorkItem::FatLto {
+                                needs_fat_lto,
+                                import_only_modules,
+                                autodiff: autodiff_items.clone(),
+                            },
+                            0,
+                        ));
                         if cgcx.parallel {
                             helper.request_token();
                         }
+                    } else {
+                        if !autodiff_items.is_empty() {
+                            let dcx = cgcx.create_dcx();
+                            dcx.handle().emit_fatal(AutodiffWithoutLto {});
+                        }
+
+                        for (work, cost) in
+                            generate_thin_lto_work(&cgcx, needs_thin_lto, import_only_modules)
+                        {
+                            let insertion_index = work_items
+                                .binary_search_by_key(&cost, |&(_, cost)| cost)
+                                .unwrap_or_else(|e| e);
+                            work_items.insert(insertion_index, (work, cost));
+                            if cgcx.parallel {
+                                helper.request_token();
+                            }
+                        }
                     }
                 }
 
@@ -1616,10 +1632,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
                     main_thread_state = MainThreadState::Idle;
                 }
 
-                Message::AddAutoDiffItems(mut items) => {
-                    autodiff_items.append(&mut items);
-                }
-
                 Message::CodegenComplete => {
                     if codegen_state != Aborted {
                         codegen_state = Completed;
@@ -1702,7 +1714,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
             let dcx = dcx.handle();
             let module = B::run_link(&cgcx, dcx, needs_link).map_err(|_| ())?;
             let module =
-                B::codegen(&cgcx, dcx, module, cgcx.config(ModuleKind::Regular)).map_err(|_| ())?;
+                B::codegen(&cgcx, module, cgcx.config(ModuleKind::Regular)).map_err(|_| ())?;
             compiled_modules.push(module);
         }
 
@@ -1842,10 +1854,22 @@ fn spawn_work<'a, B: ExtraBackendMethods>(
                     );
                     Ok(execute_copy_from_cache_work_item(&cgcx, m, module_config))
                 }
-                WorkItem::LTO(m) => {
+                WorkItem::FatLto { needs_fat_lto, import_only_modules, autodiff } => {
+                    let _timer = cgcx
+                        .prof
+                        .generic_activity_with_arg("codegen_module_perform_lto", "everything");
+                    execute_fat_lto_work_item(
+                        &cgcx,
+                        needs_fat_lto,
+                        import_only_modules,
+                        autodiff,
+                        module_config,
+                    )
+                }
+                WorkItem::ThinLto(m) => {
                     let _timer =
                         cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", m.name());
-                    execute_lto_work_item(&cgcx, m, module_config)
+                    execute_thin_lto_work_item(&cgcx, m, module_config)
                 }
             })
         };
@@ -2082,10 +2106,6 @@ impl<B: ExtraBackendMethods> OngoingCodegen<B> {
         drop(self.coordinator.sender.send(Box::new(Message::CodegenComplete::<B>)));
     }
 
-    pub(crate) fn submit_autodiff_items(&self, items: Vec<AutoDiffItem>) {
-        drop(self.coordinator.sender.send(Box::new(Message::<B>::AddAutoDiffItems(items))));
-    }
-
     pub(crate) fn check_for_errors(&self, sess: &Session) {
         self.shared_emitter_main.check(sess, false);
     }
diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs
index 18581f854b6..833456abb8a 100644
--- a/compiler/rustc_codegen_ssa/src/base.rs
+++ b/compiler/rustc_codegen_ssa/src/base.rs
@@ -647,7 +647,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
 ) -> OngoingCodegen<B> {
     // Skip crate items and just output metadata in -Z no-codegen mode.
     if tcx.sess.opts.unstable_opts.no_codegen || !tcx.sess.opts.output_types.should_codegen() {
-        let ongoing_codegen = start_async_codegen(backend, tcx, target_cpu);
+        let ongoing_codegen = start_async_codegen(backend, tcx, target_cpu, &[]);
 
         ongoing_codegen.codegen_finished(tcx);
 
@@ -667,7 +667,6 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
     // codegen units.
     let MonoItemPartitions { codegen_units, autodiff_items, .. } =
         tcx.collect_and_partition_mono_items(());
-    let autodiff_fncs = autodiff_items.to_vec();
 
     // Force all codegen_unit queries so they are already either red or green
     // when compile_codegen_unit accesses them. We are not able to re-execute
@@ -680,7 +679,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
         }
     }
 
-    let ongoing_codegen = start_async_codegen(backend.clone(), tcx, target_cpu);
+    let ongoing_codegen = start_async_codegen(backend.clone(), tcx, target_cpu, autodiff_items);
 
     // Codegen an allocator shim, if necessary.
     if let Some(kind) = allocator_kind_for_codegen(tcx) {
@@ -710,10 +709,6 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
         );
     }
 
-    if !autodiff_fncs.is_empty() {
-        ongoing_codegen.submit_autodiff_items(autodiff_fncs);
-    }
-
     // For better throughput during parallel processing by LLVM, we used to sort
     // CGUs largest to smallest. This would lead to better thread utilization
     // by, for example, preventing a large CGU from being processed last and
diff --git a/compiler/rustc_codegen_ssa/src/meth.rs b/compiler/rustc_codegen_ssa/src/meth.rs
index 2aa5c3c27ea..34ad35a729b 100644
--- a/compiler/rustc_codegen_ssa/src/meth.rs
+++ b/compiler/rustc_codegen_ssa/src/meth.rs
@@ -139,7 +139,8 @@ pub(crate) fn load_vtable<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
         && bx.cx().sess().lto() == Lto::Fat
     {
         if let Some(trait_ref) = dyn_trait_in_self(bx.tcx(), ty) {
-            let typeid = bx.typeid_metadata(typeid_for_trait_ref(bx.tcx(), trait_ref)).unwrap();
+            let typeid =
+                bx.typeid_metadata(typeid_for_trait_ref(bx.tcx(), trait_ref).as_bytes()).unwrap();
             let func = bx.type_checked_load(llvtable, vtable_byte_offset, typeid);
             return func;
         } else if nonnull {
diff --git a/compiler/rustc_codegen_ssa/src/mir/debuginfo.rs b/compiler/rustc_codegen_ssa/src/mir/debuginfo.rs
index 025f5fb54f4..b8f635ab781 100644
--- a/compiler/rustc_codegen_ssa/src/mir/debuginfo.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/debuginfo.rs
@@ -356,7 +356,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
             LocalRef::Operand(operand) => {
                 // Don't spill operands onto the stack in naked functions.
                 // See: https://github.com/rust-lang/rust/issues/42779
-                let attrs = bx.tcx().codegen_fn_attrs(self.instance.def_id());
+                let attrs = bx.tcx().codegen_instance_attrs(self.instance.def);
                 if attrs.flags.contains(CodegenFnAttrFlags::NAKED) {
                     return;
                 }
diff --git a/compiler/rustc_codegen_ssa/src/mir/mod.rs b/compiler/rustc_codegen_ssa/src/mir/mod.rs
index fa69820d5d2..50d0f910744 100644
--- a/compiler/rustc_codegen_ssa/src/mir/mod.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/mod.rs
@@ -390,9 +390,8 @@ fn arg_local_refs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
 
     let mut num_untupled = None;
 
-    let codegen_fn_attrs = bx.tcx().codegen_fn_attrs(fx.instance.def_id());
-    let naked = codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED);
-    if naked {
+    let codegen_fn_attrs = bx.tcx().codegen_instance_attrs(fx.instance.def);
+    if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED) {
         return vec![];
     }
 
diff --git a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
index beaf8950978..42e435cf0a3 100644
--- a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
@@ -128,7 +128,7 @@ fn prefix_and_suffix<'tcx>(
     let is_arm = tcx.sess.target.arch == "arm";
     let is_thumb = tcx.sess.unstable_target_features.contains(&sym::thumb_mode);
 
-    let attrs = tcx.codegen_fn_attrs(instance.def_id());
+    let attrs = tcx.codegen_instance_attrs(instance.def);
     let link_section = attrs.link_section.map(|symbol| symbol.as_str().to_string());
 
     // If no alignment is specified, an alignment of 4 bytes is used.
diff --git a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
index e90463aacc8..2587e89417a 100644
--- a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
@@ -611,11 +611,19 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                     let fn_abi = bx.fn_abi_of_instance(instance, ty::List::empty());
                     let fn_ty = bx.fn_decl_backend_type(fn_abi);
                     let fn_attrs = if bx.tcx().def_kind(instance.def_id()).has_codegen_attrs() {
-                        Some(bx.tcx().codegen_fn_attrs(instance.def_id()))
+                        Some(bx.tcx().codegen_instance_attrs(instance.def))
                     } else {
                         None
                     };
-                    bx.call(fn_ty, fn_attrs, Some(fn_abi), fn_ptr, &[], None, Some(instance))
+                    bx.call(
+                        fn_ty,
+                        fn_attrs.as_deref(),
+                        Some(fn_abi),
+                        fn_ptr,
+                        &[],
+                        None,
+                        Some(instance),
+                    )
                 } else {
                     bx.get_static(def_id)
                 };
diff --git a/compiler/rustc_codegen_ssa/src/mono_item.rs b/compiler/rustc_codegen_ssa/src/mono_item.rs
index 7b4268abe4b..b9040c330fb 100644
--- a/compiler/rustc_codegen_ssa/src/mono_item.rs
+++ b/compiler/rustc_codegen_ssa/src/mono_item.rs
@@ -41,12 +41,8 @@ impl<'a, 'tcx: 'a> MonoItemExt<'a, 'tcx> for MonoItem<'tcx> {
                 base::codegen_global_asm(cx, item_id);
             }
             MonoItem::Fn(instance) => {
-                if cx
-                    .tcx()
-                    .codegen_fn_attrs(instance.def_id())
-                    .flags
-                    .contains(CodegenFnAttrFlags::NAKED)
-                {
+                let flags = cx.tcx().codegen_instance_attrs(instance.def).flags;
+                if flags.contains(CodegenFnAttrFlags::NAKED) {
                     naked_asm::codegen_naked_asm::<Bx::CodegenCx>(cx, instance, item_data);
                 } else {
                     base::codegen_instance::<Bx>(cx, instance);
@@ -75,7 +71,7 @@ impl<'a, 'tcx: 'a> MonoItemExt<'a, 'tcx> for MonoItem<'tcx> {
                 cx.predefine_static(def_id, linkage, visibility, symbol_name);
             }
             MonoItem::Fn(instance) => {
-                let attrs = cx.tcx().codegen_fn_attrs(instance.def_id());
+                let attrs = cx.tcx().codegen_instance_attrs(instance.def);
 
                 if attrs.flags.contains(CodegenFnAttrFlags::NAKED) {
                     // do not define this function; it will become a global assembly block
diff --git a/compiler/rustc_codegen_ssa/src/traits/type_.rs b/compiler/rustc_codegen_ssa/src/traits/type_.rs
index dcd9e25b2c9..32c24965e1b 100644
--- a/compiler/rustc_codegen_ssa/src/traits/type_.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/type_.rs
@@ -154,9 +154,9 @@ pub trait LayoutTypeCodegenMethods<'tcx>: BackendTypes {
 // For backends that support CFI using type membership (i.e., testing whether a given pointer is
 // associated with a type identifier).
 pub trait TypeMembershipCodegenMethods<'tcx>: BackendTypes {
-    fn add_type_metadata(&self, _function: Self::Function, _typeid: String) {}
-    fn set_type_metadata(&self, _function: Self::Function, _typeid: String) {}
-    fn typeid_metadata(&self, _typeid: String) -> Option<Self::Metadata> {
+    fn add_type_metadata(&self, _function: Self::Function, _typeid: &[u8]) {}
+    fn set_type_metadata(&self, _function: Self::Function, _typeid: &[u8]) {}
+    fn typeid_metadata(&self, _typeid: &[u8]) -> Option<Self::Metadata> {
         None
     }
     fn add_kcfi_type_metadata(&self, _function: Self::Function, _typeid: u32) {}
diff --git a/compiler/rustc_codegen_ssa/src/traits/write.rs b/compiler/rustc_codegen_ssa/src/traits/write.rs
index 07a0609fda1..5e993640472 100644
--- a/compiler/rustc_codegen_ssa/src/traits/write.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/write.rs
@@ -2,7 +2,7 @@ use rustc_ast::expand::autodiff_attrs::AutoDiffItem;
 use rustc_errors::{DiagCtxtHandle, FatalError};
 use rustc_middle::dep_graph::WorkProduct;
 
-use crate::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule};
+use crate::back::lto::{SerializedModule, ThinModule};
 use crate::back::write::{CodegenContext, FatLtoInput, ModuleConfig};
 use crate::{CompiledModule, ModuleCodegen};
 
@@ -20,13 +20,14 @@ pub trait WriteBackendMethods: Clone + 'static {
         dcx: DiagCtxtHandle<'_>,
         modules: Vec<ModuleCodegen<Self::Module>>,
     ) -> Result<ModuleCodegen<Self::Module>, FatalError>;
-    /// Performs fat LTO by merging all modules into a single one and returning it
-    /// for further optimization.
-    fn run_fat_lto(
+    /// Performs fat LTO by merging all modules into a single one, running autodiff
+    /// if necessary and running any further optimizations
+    fn run_and_optimize_fat_lto(
         cgcx: &CodegenContext<Self>,
         modules: Vec<FatLtoInput<Self>>,
         cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
-    ) -> Result<LtoModuleCodegen<Self>, FatalError>;
+        diff_fncs: Vec<AutoDiffItem>,
+    ) -> Result<ModuleCodegen<Self::Module>, FatalError>;
     /// Performs thin LTO by performing necessary global analysis and returning two
     /// lists, one of the modules that need optimization and another for modules that
     /// can simply be copied over from the incr. comp. cache.
@@ -34,7 +35,7 @@ pub trait WriteBackendMethods: Clone + 'static {
         cgcx: &CodegenContext<Self>,
         modules: Vec<(String, Self::ThinBuffer)>,
         cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
-    ) -> Result<(Vec<LtoModuleCodegen<Self>>, Vec<WorkProduct>), FatalError>;
+    ) -> Result<(Vec<ThinModule<Self>>, Vec<WorkProduct>), FatalError>;
     fn print_pass_timings(&self);
     fn print_statistics(&self);
     fn optimize(
@@ -43,17 +44,12 @@ pub trait WriteBackendMethods: Clone + 'static {
         module: &mut ModuleCodegen<Self::Module>,
         config: &ModuleConfig,
     ) -> Result<(), FatalError>;
-    fn optimize_fat(
-        cgcx: &CodegenContext<Self>,
-        llmod: &mut ModuleCodegen<Self::Module>,
-    ) -> Result<(), FatalError>;
     fn optimize_thin(
         cgcx: &CodegenContext<Self>,
         thin: ThinModule<Self>,
     ) -> Result<ModuleCodegen<Self::Module>, FatalError>;
     fn codegen(
         cgcx: &CodegenContext<Self>,
-        dcx: DiagCtxtHandle<'_>,
         module: ModuleCodegen<Self::Module>,
         config: &ModuleConfig,
     ) -> Result<CompiledModule, FatalError>;
@@ -62,12 +58,6 @@ pub trait WriteBackendMethods: Clone + 'static {
         want_summary: bool,
     ) -> (String, Self::ThinBuffer);
     fn serialize_module(module: ModuleCodegen<Self::Module>) -> (String, Self::ModuleBuffer);
-    fn autodiff(
-        cgcx: &CodegenContext<Self>,
-        module: &ModuleCodegen<Self::Module>,
-        diff_fncs: Vec<AutoDiffItem>,
-        config: &ModuleConfig,
-    ) -> Result<(), FatalError>;
 }
 
 pub trait ThinBufferMethods: Send + Sync {