about summary refs log tree commit diff
path: root/compiler/rustc_codegen_ssa
diff options
context:
space:
mode:
authorFolkert de Vries <flokkievids@gmail.com>2025-08-03 10:44:41 +0000
committerGitHub <noreply@github.com>2025-08-03 10:44:41 +0000
commitc691374cc19d583103f3da5d655efb308a1648b9 (patch)
tree767496caef5b54e56f46a834d0d612705ce5e488 /compiler/rustc_codegen_ssa
parentdaa742afe5970109c1e15b391226f78087b10439 (diff)
parent49aa0ecc7b251003e61c38a56471195a2d3dabee (diff)
downloadrust-c691374cc19d583103f3da5d655efb308a1648b9.tar.gz
rust-c691374cc19d583103f3da5d655efb308a1648b9.zip
Merge pull request #1889 from rust-lang/rustc-pull
Rustc pull update
Diffstat (limited to 'compiler/rustc_codegen_ssa')
-rw-r--r--compiler/rustc_codegen_ssa/Cargo.toml4
-rw-r--r--compiler/rustc_codegen_ssa/messages.ftl10
-rw-r--r--compiler/rustc_codegen_ssa/src/back/link.rs12
-rw-r--r--compiler/rustc_codegen_ssa/src/back/link/raw_dylib.rs134
-rw-r--r--compiler/rustc_codegen_ssa/src/back/lto.rs91
-rw-r--r--compiler/rustc_codegen_ssa/src/back/symbol_export.rs2
-rw-r--r--compiler/rustc_codegen_ssa/src/back/write.rs267
-rw-r--r--compiler/rustc_codegen_ssa/src/base.rs16
-rw-r--r--compiler/rustc_codegen_ssa/src/codegen_attrs.rs551
-rw-r--r--compiler/rustc_codegen_ssa/src/common.rs2
-rw-r--r--compiler/rustc_codegen_ssa/src/errors.rs17
-rw-r--r--compiler/rustc_codegen_ssa/src/mir/analyze.rs118
-rw-r--r--compiler/rustc_codegen_ssa/src/mir/constant.rs2
-rw-r--r--compiler/rustc_codegen_ssa/src/mir/operand.rs46
-rw-r--r--compiler/rustc_codegen_ssa/src/mir/rvalue.rs178
-rw-r--r--compiler/rustc_codegen_ssa/src/traits/write.rs13
16 files changed, 815 insertions, 648 deletions
diff --git a/compiler/rustc_codegen_ssa/Cargo.toml b/compiler/rustc_codegen_ssa/Cargo.toml
index cfae1b3ec98..94501da69a7 100644
--- a/compiler/rustc_codegen_ssa/Cargo.toml
+++ b/compiler/rustc_codegen_ssa/Cargo.toml
@@ -8,8 +8,8 @@ edition = "2024"
 ar_archive_writer = "0.4.2"
 bitflags = "2.4.1"
 bstr = "1.11.3"
-# Pinned so `cargo update` bumps don't cause breakage. Please also update the
-# `cc` in `rustc_llvm` if you update the `cc` here.
+# `cc` updates often break things, so we pin it here. Cargo enforces "max 1 semver-compat version
+# per crate", so if you change this, you need to also change it in `rustc_llvm`.
 cc = "=1.2.16"
 itertools = "0.12"
 pathdiff = "0.2.0"
diff --git a/compiler/rustc_codegen_ssa/messages.ftl b/compiler/rustc_codegen_ssa/messages.ftl
index c7bd6ffd1f2..a70d0011d16 100644
--- a/compiler/rustc_codegen_ssa/messages.ftl
+++ b/compiler/rustc_codegen_ssa/messages.ftl
@@ -35,6 +35,10 @@ codegen_ssa_dlltool_fail_import_library =
     {$stdout}
     {$stderr}
 
+codegen_ssa_dynamic_linking_with_lto =
+    cannot prefer dynamic linking when performing LTO
+    .note = only 'staticlib', 'bin', and 'cdylib' outputs are supported with LTO
+
 codegen_ssa_error_calling_dlltool =
     Error calling dlltool '{$dlltool_path}': {$error}
 
@@ -191,6 +195,12 @@ codegen_ssa_linker_unsupported_modifier = `as-needed` modifier not supported for
 
 codegen_ssa_linking_failed = linking with `{$linker_path}` failed: {$exit_status}
 
+codegen_ssa_lto_disallowed = lto can only be run for executables, cdylibs and static library outputs
+
+codegen_ssa_lto_dylib = lto cannot be used for `dylib` crate type without `-Zdylib-lto`
+
+codegen_ssa_lto_proc_macro = lto cannot be used for `proc-macro` crate type without `-Zdylib-lto`
+
 codegen_ssa_malformed_cgu_name =
     found malformed codegen unit name `{$user_path}`. codegen units names must always start with the name of the crate (`{$crate_name}` in this case).
 
diff --git a/compiler/rustc_codegen_ssa/src/back/link.rs b/compiler/rustc_codegen_ssa/src/back/link.rs
index 5ce301c0eb9..162fbf3d6e2 100644
--- a/compiler/rustc_codegen_ssa/src/back/link.rs
+++ b/compiler/rustc_codegen_ssa/src/back/link.rs
@@ -3369,12 +3369,12 @@ fn warn_if_linked_with_gold(sess: &Session, path: &Path) -> Result<(), Box<dyn s
 
         let section =
             elf.sections(endian, data)?.section_by_name(endian, b".note.gnu.gold-version");
-        if let Some((_, section)) = section {
-            if let Some(mut notes) = section.notes(endian, data)? {
-                return Ok(notes.any(|note| {
-                    note.is_ok_and(|note| note.n_type(endian) == elf::NT_GNU_GOLD_VERSION)
-                }));
-            }
+        if let Some((_, section)) = section
+            && let Some(mut notes) = section.notes(endian, data)?
+        {
+            return Ok(notes.any(|note| {
+                note.is_ok_and(|note| note.n_type(endian) == elf::NT_GNU_GOLD_VERSION)
+            }));
         }
 
         Ok(false)
diff --git a/compiler/rustc_codegen_ssa/src/back/link/raw_dylib.rs b/compiler/rustc_codegen_ssa/src/back/link/raw_dylib.rs
index 74f39022afb..b9e0c957363 100644
--- a/compiler/rustc_codegen_ssa/src/back/link/raw_dylib.rs
+++ b/compiler/rustc_codegen_ssa/src/back/link/raw_dylib.rs
@@ -4,7 +4,7 @@ use std::path::{Path, PathBuf};
 
 use rustc_abi::Endian;
 use rustc_data_structures::base_n::{CASE_INSENSITIVE, ToBaseN};
-use rustc_data_structures::fx::FxIndexMap;
+use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
 use rustc_data_structures::stable_hasher::StableHasher;
 use rustc_hashes::Hash128;
 use rustc_session::Session;
@@ -214,7 +214,7 @@ pub(super) fn create_raw_dylib_elf_stub_shared_objects<'a>(
 /// It exports all the provided symbols, but is otherwise empty.
 fn create_elf_raw_dylib_stub(sess: &Session, soname: &str, symbols: &[DllImport]) -> Vec<u8> {
     use object::write::elf as write;
-    use object::{Architecture, elf};
+    use object::{AddressSize, Architecture, elf};
 
     let mut stub_buf = Vec::new();
 
@@ -226,54 +226,94 @@ fn create_elf_raw_dylib_stub(sess: &Session, soname: &str, symbols: &[DllImport]
     // It is important that the order of reservation matches the order of writing.
     // The object crate contains many debug asserts that fire if you get this wrong.
 
+    let Some((arch, sub_arch)) = sess.target.object_architecture(&sess.unstable_target_features)
+    else {
+        sess.dcx().fatal(format!(
+            "raw-dylib is not supported for the architecture `{}`",
+            sess.target.arch
+        ));
+    };
+
     let endianness = match sess.target.options.endian {
         Endian::Little => object::Endianness::Little,
         Endian::Big => object::Endianness::Big,
     };
-    let mut stub = write::Writer::new(endianness, true, &mut stub_buf);
+
+    let is_64 = match arch.address_size() {
+        Some(AddressSize::U8 | AddressSize::U16 | AddressSize::U32) => false,
+        Some(AddressSize::U64) => true,
+        _ => sess.dcx().fatal(format!(
+            "raw-dylib is not supported for the architecture `{}`",
+            sess.target.arch
+        )),
+    };
+
+    let mut stub = write::Writer::new(endianness, is_64, &mut stub_buf);
+
+    let mut vers = Vec::new();
+    let mut vers_map = FxHashMap::default();
+    let mut syms = Vec::new();
+
+    for symbol in symbols {
+        let symbol_name = symbol.name.as_str();
+        if let Some((name, version_name)) = symbol_name.split_once('@') {
+            assert!(!version_name.contains('@'));
+            let dynstr = stub.add_dynamic_string(name.as_bytes());
+            let ver = if let Some(&ver_id) = vers_map.get(version_name) {
+                ver_id
+            } else {
+                let id = vers.len();
+                vers_map.insert(version_name, id);
+                let dynstr = stub.add_dynamic_string(version_name.as_bytes());
+                vers.push((version_name, dynstr));
+                id
+            };
+            syms.push((name, dynstr, Some(ver)));
+        } else {
+            let dynstr = stub.add_dynamic_string(symbol_name.as_bytes());
+            syms.push((symbol_name, dynstr, None));
+        }
+    }
+
+    let soname = stub.add_dynamic_string(soname.as_bytes());
 
     // These initial reservations don't reserve any bytes in the binary yet,
     // they just allocate in the internal data structures.
 
-    // First, we crate the dynamic symbol table. It starts with a null symbol
+    // First, we create the dynamic symbol table. It starts with a null symbol
     // and then all the symbols and their dynamic strings.
     stub.reserve_null_dynamic_symbol_index();
 
-    let dynstrs = symbols
-        .iter()
-        .map(|sym| {
-            stub.reserve_dynamic_symbol_index();
-            (sym, stub.add_dynamic_string(sym.name.as_str().as_bytes()))
-        })
-        .collect::<Vec<_>>();
-
-    let soname = stub.add_dynamic_string(soname.as_bytes());
+    for _ in syms.iter() {
+        stub.reserve_dynamic_symbol_index();
+    }
 
     // Reserve the sections.
     // We have the minimal sections for a dynamic SO and .text where we point our dummy symbols to.
     stub.reserve_shstrtab_section_index();
     let text_section_name = stub.add_section_name(".text".as_bytes());
     let text_section = stub.reserve_section_index();
-    stub.reserve_dynstr_section_index();
     stub.reserve_dynsym_section_index();
+    stub.reserve_dynstr_section_index();
+    if !vers.is_empty() {
+        stub.reserve_gnu_versym_section_index();
+        stub.reserve_gnu_verdef_section_index();
+    }
     stub.reserve_dynamic_section_index();
 
     // These reservations now determine the actual layout order of the object file.
     stub.reserve_file_header();
     stub.reserve_shstrtab();
     stub.reserve_section_headers();
-    stub.reserve_dynstr();
     stub.reserve_dynsym();
+    stub.reserve_dynstr();
+    if !vers.is_empty() {
+        stub.reserve_gnu_versym();
+        stub.reserve_gnu_verdef(1 + vers.len(), 1 + vers.len());
+    }
     stub.reserve_dynamic(2); // DT_SONAME, DT_NULL
 
     // First write the ELF header with the arch information.
-    let Some((arch, sub_arch)) = sess.target.object_architecture(&sess.unstable_target_features)
-    else {
-        sess.dcx().fatal(format!(
-            "raw-dylib is not supported for the architecture `{}`",
-            sess.target.arch
-        ));
-    };
     let e_machine = match (arch, sub_arch) {
         (Architecture::Aarch64, None) => elf::EM_AARCH64,
         (Architecture::Aarch64_Ilp32, None) => elf::EM_AARCH64,
@@ -342,18 +382,19 @@ fn create_elf_raw_dylib_stub(sess: &Session, soname: &str, symbols: &[DllImport]
         sh_addralign: 1,
         sh_entsize: 0,
     });
-    stub.write_dynstr_section_header(0);
     stub.write_dynsym_section_header(0, 1);
+    stub.write_dynstr_section_header(0);
+    if !vers.is_empty() {
+        stub.write_gnu_versym_section_header(0);
+        stub.write_gnu_verdef_section_header(0);
+    }
     stub.write_dynamic_section_header(0);
 
-    // .dynstr
-    stub.write_dynstr();
-
     // .dynsym
     stub.write_null_dynamic_symbol();
-    for (_, name) in dynstrs {
+    for (_name, dynstr, _ver) in syms.iter().copied() {
         stub.write_dynamic_symbol(&write::Sym {
-            name: Some(name),
+            name: Some(dynstr),
             st_info: (elf::STB_GLOBAL << 4) | elf::STT_NOTYPE,
             st_other: elf::STV_DEFAULT,
             section: Some(text_section),
@@ -363,10 +404,47 @@ fn create_elf_raw_dylib_stub(sess: &Session, soname: &str, symbols: &[DllImport]
         });
     }
 
+    // .dynstr
+    stub.write_dynstr();
+
+    // ld.bfd is unhappy if these sections exist without any symbols, so we only generate them when necessary.
+    if !vers.is_empty() {
+        // .gnu_version
+        stub.write_null_gnu_versym();
+        for (_name, _dynstr, ver) in syms.iter().copied() {
+            stub.write_gnu_versym(if let Some(ver) = ver {
+                assert!((2 + ver as u16) < elf::VERSYM_HIDDEN);
+                elf::VERSYM_HIDDEN | (2 + ver as u16)
+            } else {
+                1
+            });
+        }
+
+        // .gnu_version_d
+        stub.write_align_gnu_verdef();
+        stub.write_gnu_verdef(&write::Verdef {
+            version: elf::VER_DEF_CURRENT,
+            flags: elf::VER_FLG_BASE,
+            index: 1,
+            aux_count: 1,
+            name: soname,
+        });
+        for (ver, (_name, dynstr)) in vers.into_iter().enumerate() {
+            stub.write_gnu_verdef(&write::Verdef {
+                version: elf::VER_DEF_CURRENT,
+                flags: 0,
+                index: 2 + ver as u16,
+                aux_count: 1,
+                name: dynstr,
+            });
+        }
+    }
+
     // .dynamic
     // the DT_SONAME will be used by the linker to populate DT_NEEDED
     // which the loader uses to find the library.
     // DT_NULL terminates the .dynamic table.
+    stub.write_align_dynamic();
     stub.write_dynamic_string(elf::DT_SONAME, soname);
     stub.write_dynamic(elf::DT_NULL, 0);
 
diff --git a/compiler/rustc_codegen_ssa/src/back/lto.rs b/compiler/rustc_codegen_ssa/src/back/lto.rs
index b49b6783bbd..c95038375a1 100644
--- a/compiler/rustc_codegen_ssa/src/back/lto.rs
+++ b/compiler/rustc_codegen_ssa/src/back/lto.rs
@@ -2,7 +2,15 @@ use std::ffi::CString;
 use std::sync::Arc;
 
 use rustc_data_structures::memmap::Mmap;
+use rustc_hir::def_id::{CrateNum, LOCAL_CRATE};
+use rustc_middle::middle::exported_symbols::{ExportedSymbol, SymbolExportInfo, SymbolExportLevel};
+use rustc_middle::ty::TyCtxt;
+use rustc_session::config::{CrateType, Lto};
+use tracing::info;
 
+use crate::back::symbol_export::{self, symbol_name_for_instance_in_crate};
+use crate::back::write::CodegenContext;
+use crate::errors::{DynamicLinkingWithLTO, LtoDisallowed, LtoDylib, LtoProcMacro};
 use crate::traits::*;
 
 pub struct ThinModule<B: WriteBackendMethods> {
@@ -52,3 +60,86 @@ impl<M: ModuleBufferMethods> SerializedModule<M> {
         }
     }
 }
+
+fn crate_type_allows_lto(crate_type: CrateType) -> bool {
+    match crate_type {
+        CrateType::Executable
+        | CrateType::Dylib
+        | CrateType::Staticlib
+        | CrateType::Cdylib
+        | CrateType::ProcMacro
+        | CrateType::Sdylib => true,
+        CrateType::Rlib => false,
+    }
+}
+
+pub(super) fn exported_symbols_for_lto(
+    tcx: TyCtxt<'_>,
+    each_linked_rlib_for_lto: &[CrateNum],
+) -> Vec<String> {
+    let export_threshold = match tcx.sess.lto() {
+        // We're just doing LTO for our one crate
+        Lto::ThinLocal => SymbolExportLevel::Rust,
+
+        // We're doing LTO for the entire crate graph
+        Lto::Fat | Lto::Thin => symbol_export::crates_export_threshold(&tcx.crate_types()),
+
+        Lto::No => return vec![],
+    };
+
+    let copy_symbols = |cnum| {
+        tcx.exported_non_generic_symbols(cnum)
+            .iter()
+            .chain(tcx.exported_generic_symbols(cnum))
+            .filter_map(|&(s, info): &(ExportedSymbol<'_>, SymbolExportInfo)| {
+                if info.level.is_below_threshold(export_threshold) || info.used {
+                    Some(symbol_name_for_instance_in_crate(tcx, s, cnum))
+                } else {
+                    None
+                }
+            })
+            .collect::<Vec<_>>()
+    };
+    let mut symbols_below_threshold = {
+        let _timer = tcx.prof.generic_activity("lto_generate_symbols_below_threshold");
+        copy_symbols(LOCAL_CRATE)
+    };
+    info!("{} symbols to preserve in this crate", symbols_below_threshold.len());
+
+    // If we're performing LTO for the entire crate graph, then for each of our
+    // upstream dependencies, include their exported symbols.
+    if tcx.sess.lto() != Lto::ThinLocal {
+        for &cnum in each_linked_rlib_for_lto {
+            let _timer = tcx.prof.generic_activity("lto_generate_symbols_below_threshold");
+            symbols_below_threshold.extend(copy_symbols(cnum));
+        }
+    }
+
+    symbols_below_threshold
+}
+
+pub(super) fn check_lto_allowed<B: WriteBackendMethods>(cgcx: &CodegenContext<B>) {
+    if cgcx.lto == Lto::ThinLocal {
+        // Crate local LTO is always allowed
+        return;
+    }
+
+    let dcx = cgcx.create_dcx();
+
+    // Make sure we actually can run LTO
+    for crate_type in cgcx.crate_types.iter() {
+        if !crate_type_allows_lto(*crate_type) {
+            dcx.handle().emit_fatal(LtoDisallowed);
+        } else if *crate_type == CrateType::Dylib {
+            if !cgcx.opts.unstable_opts.dylib_lto {
+                dcx.handle().emit_fatal(LtoDylib);
+            }
+        } else if *crate_type == CrateType::ProcMacro && !cgcx.opts.unstable_opts.dylib_lto {
+            dcx.handle().emit_fatal(LtoProcMacro);
+        }
+    }
+
+    if cgcx.opts.cg.prefer_dynamic && !cgcx.opts.unstable_opts.dylib_lto {
+        dcx.handle().emit_fatal(DynamicLinkingWithLTO);
+    }
+}
diff --git a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs
index 2f5eca2d6b2..297bdec2bc2 100644
--- a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs
+++ b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs
@@ -86,7 +86,7 @@ fn reachable_non_generics_provider(tcx: TyCtxt<'_>, _: LocalCrate) -> DefIdMap<S
             // Only consider nodes that actually have exported symbols.
             match tcx.def_kind(def_id) {
                 DefKind::Fn | DefKind::Static { .. } => {}
-                DefKind::AssocFn if tcx.impl_of_method(def_id.to_def_id()).is_some() => {}
+                DefKind::AssocFn if tcx.impl_of_assoc(def_id.to_def_id()).is_some() => {}
                 _ => return None,
             };
 
diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index 24e0a4eb533..6773d3e24e9 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -1,4 +1,3 @@
-use std::any::Any;
 use std::assert_matches::assert_matches;
 use std::marker::PhantomData;
 use std::path::{Path, PathBuf};
@@ -9,7 +8,7 @@ use std::{fs, io, mem, str, thread};
 use rustc_abi::Size;
 use rustc_ast::attr;
 use rustc_ast::expand::autodiff_attrs::AutoDiffItem;
-use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
+use rustc_data_structures::fx::FxIndexMap;
 use rustc_data_structures::jobserver::{self, Acquired};
 use rustc_data_structures::memmap::Mmap;
 use rustc_data_structures::profiling::{SelfProfilerRef, VerboseTimingGuard};
@@ -20,14 +19,12 @@ use rustc_errors::{
     Suggestions,
 };
 use rustc_fs_util::link_or_copy;
-use rustc_hir::def_id::{CrateNum, LOCAL_CRATE};
 use rustc_incremental::{
     copy_cgu_workproduct_to_incr_comp_cache_dir, in_incr_comp_dir, in_incr_comp_dir_sess,
 };
 use rustc_metadata::fs::copy_to_stdout;
 use rustc_middle::bug;
 use rustc_middle::dep_graph::{WorkProduct, WorkProductId};
-use rustc_middle::middle::exported_symbols::SymbolExportInfo;
 use rustc_middle::ty::TyCtxt;
 use rustc_session::Session;
 use rustc_session::config::{
@@ -40,7 +37,7 @@ use tracing::debug;
 
 use super::link::{self, ensure_removed};
 use super::lto::{self, SerializedModule};
-use super::symbol_export::symbol_name_for_instance_in_crate;
+use crate::back::lto::check_lto_allowed;
 use crate::errors::{AutodiffWithoutLto, ErrorCreatingRemarkDir};
 use crate::traits::*;
 use crate::{
@@ -332,8 +329,6 @@ pub type TargetMachineFactoryFn<B> = Arc<
         + Sync,
 >;
 
-type ExportedSymbols = FxHashMap<CrateNum, Arc<Vec<(String, SymbolExportInfo)>>>;
-
 /// Additional resources used by optimize_and_codegen (not module specific)
 #[derive(Clone)]
 pub struct CodegenContext<B: WriteBackendMethods> {
@@ -343,10 +338,8 @@ pub struct CodegenContext<B: WriteBackendMethods> {
     pub save_temps: bool,
     pub fewer_names: bool,
     pub time_trace: bool,
-    pub exported_symbols: Option<Arc<ExportedSymbols>>,
     pub opts: Arc<config::Options>,
     pub crate_types: Vec<CrateType>,
-    pub each_linked_rlib_for_lto: Vec<(CrateNum, PathBuf)>,
     pub output_filenames: Arc<OutputFilenames>,
     pub invocation_temp: Option<String>,
     pub regular_module_config: Arc<ModuleConfig>,
@@ -378,8 +371,6 @@ pub struct CodegenContext<B: WriteBackendMethods> {
     /// The incremental compilation session directory, or None if we are not
     /// compiling incrementally
     pub incr_comp_session_dir: Option<PathBuf>,
-    /// Channel back to the main control thread to send messages to
-    pub coordinator_send: Sender<Box<dyn Any + Send>>,
     /// `true` if the codegen should be run in parallel.
     ///
     /// Depends on [`ExtraBackendMethods::supports_parallel()`] and `-Zno_parallel_backend`.
@@ -401,13 +392,21 @@ impl<B: WriteBackendMethods> CodegenContext<B> {
 
 fn generate_thin_lto_work<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
+    exported_symbols_for_lto: &[String],
+    each_linked_rlib_for_lto: &[PathBuf],
     needs_thin_lto: Vec<(String, B::ThinBuffer)>,
     import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
 ) -> Vec<(WorkItem<B>, u64)> {
     let _prof_timer = cgcx.prof.generic_activity("codegen_thin_generate_lto_work");
 
-    let (lto_modules, copy_jobs) =
-        B::run_thin_lto(cgcx, needs_thin_lto, import_only_modules).unwrap_or_else(|e| e.raise());
+    let (lto_modules, copy_jobs) = B::run_thin_lto(
+        cgcx,
+        exported_symbols_for_lto,
+        each_linked_rlib_for_lto,
+        needs_thin_lto,
+        import_only_modules,
+    )
+    .unwrap_or_else(|e| e.raise());
     lto_modules
         .into_iter()
         .map(|module| {
@@ -723,6 +722,8 @@ pub(crate) enum WorkItem<B: WriteBackendMethods> {
     CopyPostLtoArtifacts(CachedModuleCodegen),
     /// Performs fat LTO on the given module.
     FatLto {
+        exported_symbols_for_lto: Arc<Vec<String>>,
+        each_linked_rlib_for_lto: Vec<PathBuf>,
         needs_fat_lto: Vec<FatLtoInput<B>>,
         import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
         autodiff: Vec<AutoDiffItem>,
@@ -796,10 +797,6 @@ pub(crate) enum WorkItemResult<B: WriteBackendMethods> {
     /// The backend has finished compiling a CGU, nothing more required.
     Finished(CompiledModule),
 
-    /// The backend has finished compiling a CGU, which now needs linking
-    /// because `-Zcombine-cgu` was specified.
-    NeedsLink(ModuleCodegen<B::Module>),
-
     /// The backend has finished compiling a CGU, which now needs to go through
     /// fat LTO.
     NeedsFatLto(FatLtoInput<B>),
@@ -810,7 +807,7 @@ pub(crate) enum WorkItemResult<B: WriteBackendMethods> {
 }
 
 pub enum FatLtoInput<B: WriteBackendMethods> {
-    Serialized { name: String, buffer: B::ModuleBuffer },
+    Serialized { name: String, buffer: SerializedModule<B::ModuleBuffer> },
     InMemory(ModuleCodegen<B::Module>),
 }
 
@@ -883,7 +880,10 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
     };
 
     match lto_type {
-        ComputedLtoType::No => finish_intra_module_work(cgcx, module, module_config),
+        ComputedLtoType::No => {
+            let module = B::codegen(cgcx, module, module_config)?;
+            Ok(WorkItemResult::Finished(module))
+        }
         ComputedLtoType::Thin => {
             let (name, thin_buffer) = B::prepare_thin(module, false);
             if let Some(path) = bitcode {
@@ -899,7 +899,10 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
                 fs::write(&path, buffer.data()).unwrap_or_else(|e| {
                     panic!("Error writing pre-lto-bitcode file `{}`: {}", path.display(), e);
                 });
-                Ok(WorkItemResult::NeedsFatLto(FatLtoInput::Serialized { name, buffer }))
+                Ok(WorkItemResult::NeedsFatLto(FatLtoInput::Serialized {
+                    name,
+                    buffer: SerializedModule::Local(buffer),
+                }))
             }
             None => Ok(WorkItemResult::NeedsFatLto(FatLtoInput::InMemory(module))),
         },
@@ -992,12 +995,24 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
 
 fn execute_fat_lto_work_item<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
-    needs_fat_lto: Vec<FatLtoInput<B>>,
+    exported_symbols_for_lto: &[String],
+    each_linked_rlib_for_lto: &[PathBuf],
+    mut needs_fat_lto: Vec<FatLtoInput<B>>,
     import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
     autodiff: Vec<AutoDiffItem>,
     module_config: &ModuleConfig,
 ) -> Result<WorkItemResult<B>, FatalError> {
-    let module = B::run_and_optimize_fat_lto(cgcx, needs_fat_lto, import_only_modules, autodiff)?;
+    for (module, wp) in import_only_modules {
+        needs_fat_lto.push(FatLtoInput::Serialized { name: wp.cgu_name, buffer: module })
+    }
+
+    let module = B::run_and_optimize_fat_lto(
+        cgcx,
+        exported_symbols_for_lto,
+        each_linked_rlib_for_lto,
+        needs_fat_lto,
+        autodiff,
+    )?;
     let module = B::codegen(cgcx, module, module_config)?;
     Ok(WorkItemResult::Finished(module))
 }
@@ -1008,20 +1023,8 @@ fn execute_thin_lto_work_item<B: ExtraBackendMethods>(
     module_config: &ModuleConfig,
 ) -> Result<WorkItemResult<B>, FatalError> {
     let module = B::optimize_thin(cgcx, module)?;
-    finish_intra_module_work(cgcx, module, module_config)
-}
-
-fn finish_intra_module_work<B: ExtraBackendMethods>(
-    cgcx: &CodegenContext<B>,
-    module: ModuleCodegen<B::Module>,
-    module_config: &ModuleConfig,
-) -> Result<WorkItemResult<B>, FatalError> {
-    if !cgcx.opts.unstable_opts.combine_cgu || module.kind == ModuleKind::Allocator {
-        let module = B::codegen(cgcx, module, module_config)?;
-        Ok(WorkItemResult::Finished(module))
-    } else {
-        Ok(WorkItemResult::NeedsLink(module))
-    }
+    let module = B::codegen(cgcx, module, module_config)?;
+    Ok(WorkItemResult::Finished(module))
 }
 
 /// Messages sent to the coordinator.
@@ -1032,7 +1035,7 @@ pub(crate) enum Message<B: WriteBackendMethods> {
 
     /// The backend has finished processing a work item for a codegen unit.
     /// Sent from a backend worker thread.
-    WorkItem { result: Result<WorkItemResult<B>, Option<WorkerFatalError>>, worker_id: usize },
+    WorkItem { result: Result<WorkItemResult<B>, Option<WorkerFatalError>> },
 
     /// The frontend has finished generating something (backend IR or a
     /// post-LTO artifact) for a codegen unit, and it should be passed to the
@@ -1103,52 +1106,28 @@ fn start_executing_work<B: ExtraBackendMethods>(
     autodiff_items: &[AutoDiffItem],
     shared_emitter: SharedEmitter,
     codegen_worker_send: Sender<CguMessage>,
-    coordinator_receive: Receiver<Box<dyn Any + Send>>,
+    coordinator_receive: Receiver<Message<B>>,
     regular_config: Arc<ModuleConfig>,
     allocator_config: Arc<ModuleConfig>,
-    tx_to_llvm_workers: Sender<Box<dyn Any + Send>>,
+    tx_to_llvm_workers: Sender<Message<B>>,
 ) -> thread::JoinHandle<Result<CompiledModules, ()>> {
     let coordinator_send = tx_to_llvm_workers;
     let sess = tcx.sess;
     let autodiff_items = autodiff_items.to_vec();
 
     let mut each_linked_rlib_for_lto = Vec::new();
+    let mut each_linked_rlib_file_for_lto = Vec::new();
     drop(link::each_linked_rlib(crate_info, None, &mut |cnum, path| {
         if link::ignored_for_lto(sess, crate_info, cnum) {
             return;
         }
-        each_linked_rlib_for_lto.push((cnum, path.to_path_buf()));
+        each_linked_rlib_for_lto.push(cnum);
+        each_linked_rlib_file_for_lto.push(path.to_path_buf());
     }));
 
     // Compute the set of symbols we need to retain when doing LTO (if we need to)
-    let exported_symbols = {
-        let mut exported_symbols = FxHashMap::default();
-
-        let copy_symbols = |cnum| {
-            let symbols = tcx
-                .exported_non_generic_symbols(cnum)
-                .iter()
-                .chain(tcx.exported_generic_symbols(cnum))
-                .map(|&(s, lvl)| (symbol_name_for_instance_in_crate(tcx, s, cnum), lvl))
-                .collect();
-            Arc::new(symbols)
-        };
-
-        match sess.lto() {
-            Lto::No => None,
-            Lto::ThinLocal => {
-                exported_symbols.insert(LOCAL_CRATE, copy_symbols(LOCAL_CRATE));
-                Some(Arc::new(exported_symbols))
-            }
-            Lto::Fat | Lto::Thin => {
-                exported_symbols.insert(LOCAL_CRATE, copy_symbols(LOCAL_CRATE));
-                for &(cnum, ref _path) in &each_linked_rlib_for_lto {
-                    exported_symbols.insert(cnum, copy_symbols(cnum));
-                }
-                Some(Arc::new(exported_symbols))
-            }
-        }
-    };
+    let exported_symbols_for_lto =
+        Arc::new(lto::exported_symbols_for_lto(tcx, &each_linked_rlib_for_lto));
 
     // First up, convert our jobserver into a helper thread so we can use normal
     // mpsc channels to manage our messages and such.
@@ -1158,7 +1137,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
     let coordinator_send2 = coordinator_send.clone();
     let helper = jobserver::client()
         .into_helper_thread(move |token| {
-            drop(coordinator_send2.send(Box::new(Message::Token::<B>(token))));
+            drop(coordinator_send2.send(Message::Token::<B>(token)));
         })
         .expect("failed to spawn helper thread");
 
@@ -1183,18 +1162,15 @@ fn start_executing_work<B: ExtraBackendMethods>(
 
     let cgcx = CodegenContext::<B> {
         crate_types: tcx.crate_types().to_vec(),
-        each_linked_rlib_for_lto,
         lto: sess.lto(),
         fewer_names: sess.fewer_names(),
         save_temps: sess.opts.cg.save_temps,
         time_trace: sess.opts.unstable_opts.llvm_time_trace,
         opts: Arc::new(sess.opts.clone()),
         prof: sess.prof.clone(),
-        exported_symbols,
         remark: sess.opts.cg.remark.clone(),
         remark_dir,
         incr_comp_session_dir: sess.incr_comp_session_dir_opt().map(|r| r.clone()),
-        coordinator_send,
         expanded_args: tcx.sess.expanded_args.clone(),
         diag_emitter: shared_emitter.clone(),
         output_filenames: Arc::clone(tcx.output_filenames(())),
@@ -1350,23 +1326,10 @@ fn start_executing_work<B: ExtraBackendMethods>(
     // necessary. There's already optimizations in place to avoid sending work
     // back to the coordinator if LTO isn't requested.
     return B::spawn_named_thread(cgcx.time_trace, "coordinator".to_string(), move || {
-        let mut worker_id_counter = 0;
-        let mut free_worker_ids = Vec::new();
-        let mut get_worker_id = |free_worker_ids: &mut Vec<usize>| {
-            if let Some(id) = free_worker_ids.pop() {
-                id
-            } else {
-                let id = worker_id_counter;
-                worker_id_counter += 1;
-                id
-            }
-        };
-
         // This is where we collect codegen units that have gone all the way
         // through codegen and LLVM.
         let mut compiled_modules = vec![];
         let mut compiled_allocator_module = None;
-        let mut needs_link = Vec::new();
         let mut needs_fat_lto = Vec::new();
         let mut needs_thin_lto = Vec::new();
         let mut lto_import_only_modules = Vec::new();
@@ -1442,12 +1405,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                         let (item, _) =
                             work_items.pop().expect("queue empty - queue_full_enough() broken?");
                         main_thread_state = MainThreadState::Lending;
-                        spawn_work(
-                            &cgcx,
-                            &mut llvm_start_time,
-                            get_worker_id(&mut free_worker_ids),
-                            item,
-                        );
+                        spawn_work(&cgcx, coordinator_send.clone(), &mut llvm_start_time, item);
                     }
                 }
             } else if codegen_state == Completed {
@@ -1474,12 +1432,18 @@ fn start_executing_work<B: ExtraBackendMethods>(
                     let needs_fat_lto = mem::take(&mut needs_fat_lto);
                     let needs_thin_lto = mem::take(&mut needs_thin_lto);
                     let import_only_modules = mem::take(&mut lto_import_only_modules);
+                    let each_linked_rlib_file_for_lto =
+                        mem::take(&mut each_linked_rlib_file_for_lto);
+
+                    check_lto_allowed(&cgcx);
 
                     if !needs_fat_lto.is_empty() {
                         assert!(needs_thin_lto.is_empty());
 
                         work_items.push((
                             WorkItem::FatLto {
+                                exported_symbols_for_lto: Arc::clone(&exported_symbols_for_lto),
+                                each_linked_rlib_for_lto: each_linked_rlib_file_for_lto,
                                 needs_fat_lto,
                                 import_only_modules,
                                 autodiff: autodiff_items.clone(),
@@ -1495,9 +1459,13 @@ fn start_executing_work<B: ExtraBackendMethods>(
                             dcx.handle().emit_fatal(AutodiffWithoutLto {});
                         }
 
-                        for (work, cost) in
-                            generate_thin_lto_work(&cgcx, needs_thin_lto, import_only_modules)
-                        {
+                        for (work, cost) in generate_thin_lto_work(
+                            &cgcx,
+                            &exported_symbols_for_lto,
+                            &each_linked_rlib_file_for_lto,
+                            needs_thin_lto,
+                            import_only_modules,
+                        ) {
                             let insertion_index = work_items
                                 .binary_search_by_key(&cost, |&(_, cost)| cost)
                                 .unwrap_or_else(|e| e);
@@ -1516,12 +1484,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                     MainThreadState::Idle => {
                         if let Some((item, _)) = work_items.pop() {
                             main_thread_state = MainThreadState::Lending;
-                            spawn_work(
-                                &cgcx,
-                                &mut llvm_start_time,
-                                get_worker_id(&mut free_worker_ids),
-                                item,
-                            );
+                            spawn_work(&cgcx, coordinator_send.clone(), &mut llvm_start_time, item);
                         } else {
                             // There is no unstarted work, so let the main thread
                             // take over for a running worker. Otherwise the
@@ -1557,12 +1520,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                 while running_with_own_token < tokens.len()
                     && let Some((item, _)) = work_items.pop()
                 {
-                    spawn_work(
-                        &cgcx,
-                        &mut llvm_start_time,
-                        get_worker_id(&mut free_worker_ids),
-                        item,
-                    );
+                    spawn_work(&cgcx, coordinator_send.clone(), &mut llvm_start_time, item);
                     running_with_own_token += 1;
                 }
             }
@@ -1570,23 +1528,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
             // Relinquish accidentally acquired extra tokens.
             tokens.truncate(running_with_own_token);
 
-            // If a thread exits successfully then we drop a token associated
-            // with that worker and update our `running_with_own_token` count.
-            // We may later re-acquire a token to continue running more work.
-            // We may also not actually drop a token here if the worker was
-            // running with an "ephemeral token".
-            let mut free_worker = |worker_id| {
-                if main_thread_state == MainThreadState::Lending {
-                    main_thread_state = MainThreadState::Idle;
-                } else {
-                    running_with_own_token -= 1;
-                }
-
-                free_worker_ids.push(worker_id);
-            };
-
-            let msg = coordinator_receive.recv().unwrap();
-            match *msg.downcast::<Message<B>>().ok().unwrap() {
+            match coordinator_receive.recv().unwrap() {
                 // Save the token locally and the next turn of the loop will use
                 // this to spawn a new unit of work, or it may get dropped
                 // immediately if we have no more work to spawn.
@@ -1653,14 +1595,22 @@ fn start_executing_work<B: ExtraBackendMethods>(
                     codegen_state = Aborted;
                 }
 
-                Message::WorkItem { result, worker_id } => {
-                    free_worker(worker_id);
+                Message::WorkItem { result } => {
+                    // If a thread exits successfully then we drop a token associated
+                    // with that worker and update our `running_with_own_token` count.
+                    // We may later re-acquire a token to continue running more work.
+                    // We may also not actually drop a token here if the worker was
+                    // running with an "ephemeral token".
+                    if main_thread_state == MainThreadState::Lending {
+                        main_thread_state = MainThreadState::Idle;
+                    } else {
+                        running_with_own_token -= 1;
+                    }
 
                     match result {
                         Ok(WorkItemResult::Finished(compiled_module)) => {
                             match compiled_module.kind {
                                 ModuleKind::Regular => {
-                                    assert!(needs_link.is_empty());
                                     compiled_modules.push(compiled_module);
                                 }
                                 ModuleKind::Allocator => {
@@ -1669,10 +1619,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
                                 }
                             }
                         }
-                        Ok(WorkItemResult::NeedsLink(module)) => {
-                            assert!(compiled_modules.is_empty());
-                            needs_link.push(module);
-                        }
                         Ok(WorkItemResult::NeedsFatLto(fat_lto_input)) => {
                             assert!(!started_lto);
                             assert!(needs_thin_lto.is_empty());
@@ -1709,17 +1655,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
             return Err(());
         }
 
-        let needs_link = mem::take(&mut needs_link);
-        if !needs_link.is_empty() {
-            assert!(compiled_modules.is_empty());
-            let dcx = cgcx.create_dcx();
-            let dcx = dcx.handle();
-            let module = B::run_link(&cgcx, dcx, needs_link).map_err(|_| ())?;
-            let module =
-                B::codegen(&cgcx, module, cgcx.config(ModuleKind::Regular)).map_err(|_| ())?;
-            compiled_modules.push(module);
-        }
-
         // Drop to print timings
         drop(llvm_start_time);
 
@@ -1799,8 +1734,8 @@ pub(crate) struct WorkerFatalError;
 
 fn spawn_work<'a, B: ExtraBackendMethods>(
     cgcx: &'a CodegenContext<B>,
+    coordinator_send: Sender<Message<B>>,
     llvm_start_time: &mut Option<VerboseTimingGuard<'a>>,
-    worker_id: usize,
     work: WorkItem<B>,
 ) {
     if cgcx.config(work.module_kind()).time_module && llvm_start_time.is_none() {
@@ -1813,26 +1748,23 @@ fn spawn_work<'a, B: ExtraBackendMethods>(
         // Set up a destructor which will fire off a message that we're done as
         // we exit.
         struct Bomb<B: ExtraBackendMethods> {
-            coordinator_send: Sender<Box<dyn Any + Send>>,
+            coordinator_send: Sender<Message<B>>,
             result: Option<Result<WorkItemResult<B>, FatalError>>,
-            worker_id: usize,
         }
         impl<B: ExtraBackendMethods> Drop for Bomb<B> {
             fn drop(&mut self) {
-                let worker_id = self.worker_id;
                 let msg = match self.result.take() {
-                    Some(Ok(result)) => Message::WorkItem::<B> { result: Ok(result), worker_id },
+                    Some(Ok(result)) => Message::WorkItem::<B> { result: Ok(result) },
                     Some(Err(FatalError)) => {
-                        Message::WorkItem::<B> { result: Err(Some(WorkerFatalError)), worker_id }
+                        Message::WorkItem::<B> { result: Err(Some(WorkerFatalError)) }
                     }
-                    None => Message::WorkItem::<B> { result: Err(None), worker_id },
+                    None => Message::WorkItem::<B> { result: Err(None) },
                 };
-                drop(self.coordinator_send.send(Box::new(msg)));
+                drop(self.coordinator_send.send(msg));
             }
         }
 
-        let mut bomb =
-            Bomb::<B> { coordinator_send: cgcx.coordinator_send.clone(), result: None, worker_id };
+        let mut bomb = Bomb::<B> { coordinator_send, result: None };
 
         // Execute the work itself, and if it finishes successfully then flag
         // ourselves as a success as well.
@@ -1856,12 +1788,20 @@ fn spawn_work<'a, B: ExtraBackendMethods>(
                     );
                     Ok(execute_copy_from_cache_work_item(&cgcx, m, module_config))
                 }
-                WorkItem::FatLto { needs_fat_lto, import_only_modules, autodiff } => {
+                WorkItem::FatLto {
+                    exported_symbols_for_lto,
+                    each_linked_rlib_for_lto,
+                    needs_fat_lto,
+                    import_only_modules,
+                    autodiff,
+                } => {
                     let _timer = cgcx
                         .prof
                         .generic_activity_with_arg("codegen_module_perform_lto", "everything");
                     execute_fat_lto_work_item(
                         &cgcx,
+                        &exported_symbols_for_lto,
+                        &each_linked_rlib_for_lto,
                         needs_fat_lto,
                         import_only_modules,
                         autodiff,
@@ -2029,7 +1969,7 @@ impl SharedEmitterMain {
 }
 
 pub struct Coordinator<B: ExtraBackendMethods> {
-    pub sender: Sender<Box<dyn Any + Send>>,
+    sender: Sender<Message<B>>,
     future: Option<thread::JoinHandle<Result<CompiledModules, ()>>>,
     // Only used for the Message type.
     phantom: PhantomData<B>,
@@ -2046,7 +1986,7 @@ impl<B: ExtraBackendMethods> Drop for Coordinator<B> {
         if let Some(future) = self.future.take() {
             // If we haven't joined yet, signal to the coordinator that it should spawn no more
             // work, and wait for worker threads to finish.
-            drop(self.sender.send(Box::new(Message::CodegenAborted::<B>)));
+            drop(self.sender.send(Message::CodegenAborted::<B>));
             drop(future.join());
         }
     }
@@ -2105,7 +2045,7 @@ impl<B: ExtraBackendMethods> OngoingCodegen<B> {
     pub(crate) fn codegen_finished(&self, tcx: TyCtxt<'_>) {
         self.wait_for_signal_to_codegen_item();
         self.check_for_errors(tcx.sess);
-        drop(self.coordinator.sender.send(Box::new(Message::CodegenComplete::<B>)));
+        drop(self.coordinator.sender.send(Message::CodegenComplete::<B>));
     }
 
     pub(crate) fn check_for_errors(&self, sess: &Session) {
@@ -2126,28 +2066,25 @@ impl<B: ExtraBackendMethods> OngoingCodegen<B> {
 }
 
 pub(crate) fn submit_codegened_module_to_llvm<B: ExtraBackendMethods>(
-    _backend: &B,
-    tx_to_llvm_workers: &Sender<Box<dyn Any + Send>>,
+    coordinator: &Coordinator<B>,
     module: ModuleCodegen<B::Module>,
     cost: u64,
 ) {
     let llvm_work_item = WorkItem::Optimize(module);
-    drop(tx_to_llvm_workers.send(Box::new(Message::CodegenDone::<B> { llvm_work_item, cost })));
+    drop(coordinator.sender.send(Message::CodegenDone::<B> { llvm_work_item, cost }));
 }
 
 pub(crate) fn submit_post_lto_module_to_llvm<B: ExtraBackendMethods>(
-    _backend: &B,
-    tx_to_llvm_workers: &Sender<Box<dyn Any + Send>>,
+    coordinator: &Coordinator<B>,
     module: CachedModuleCodegen,
 ) {
     let llvm_work_item = WorkItem::CopyPostLtoArtifacts(module);
-    drop(tx_to_llvm_workers.send(Box::new(Message::CodegenDone::<B> { llvm_work_item, cost: 0 })));
+    drop(coordinator.sender.send(Message::CodegenDone::<B> { llvm_work_item, cost: 0 }));
 }
 
 pub(crate) fn submit_pre_lto_module_to_llvm<B: ExtraBackendMethods>(
-    _backend: &B,
     tcx: TyCtxt<'_>,
-    tx_to_llvm_workers: &Sender<Box<dyn Any + Send>>,
+    coordinator: &Coordinator<B>,
     module: CachedModuleCodegen,
 ) {
     let filename = pre_lto_bitcode_filename(&module.name);
@@ -2161,10 +2098,10 @@ pub(crate) fn submit_pre_lto_module_to_llvm<B: ExtraBackendMethods>(
         })
     };
     // Schedule the module to be loaded
-    drop(tx_to_llvm_workers.send(Box::new(Message::AddImportOnlyModule::<B> {
+    drop(coordinator.sender.send(Message::AddImportOnlyModule::<B> {
         module_data: SerializedModule::FromUncompressedFile(mmap),
         work_product: module.source,
-    })));
+    }));
 }
 
 fn pre_lto_bitcode_filename(module_name: &str) -> String {
diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs
index 833456abb8a..a5807c56e31 100644
--- a/compiler/rustc_codegen_ssa/src/base.rs
+++ b/compiler/rustc_codegen_ssa/src/base.rs
@@ -702,8 +702,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
         // These modules are generally cheap and won't throw off scheduling.
         let cost = 0;
         submit_codegened_module_to_llvm(
-            &backend,
-            &ongoing_codegen.coordinator.sender,
+            &ongoing_codegen.coordinator,
             ModuleCodegen::new_allocator(llmod_id, module_llvm),
             cost,
         );
@@ -800,18 +799,12 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
                 // compilation hang on post-monomorphization errors.
                 tcx.dcx().abort_if_errors();
 
-                submit_codegened_module_to_llvm(
-                    &backend,
-                    &ongoing_codegen.coordinator.sender,
-                    module,
-                    cost,
-                );
+                submit_codegened_module_to_llvm(&ongoing_codegen.coordinator, module, cost);
             }
             CguReuse::PreLto => {
                 submit_pre_lto_module_to_llvm(
-                    &backend,
                     tcx,
-                    &ongoing_codegen.coordinator.sender,
+                    &ongoing_codegen.coordinator,
                     CachedModuleCodegen {
                         name: cgu.name().to_string(),
                         source: cgu.previous_work_product(tcx),
@@ -820,8 +813,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
             }
             CguReuse::PostLto => {
                 submit_post_lto_module_to_llvm(
-                    &backend,
-                    &ongoing_codegen.coordinator.sender,
+                    &ongoing_codegen.coordinator,
                     CachedModuleCodegen {
                         name: cgu.name().to_string(),
                         source: cgu.previous_work_product(tcx),
diff --git a/compiler/rustc_codegen_ssa/src/codegen_attrs.rs b/compiler/rustc_codegen_ssa/src/codegen_attrs.rs
index dd49db26689..5c54bce6e03 100644
--- a/compiler/rustc_codegen_ssa/src/codegen_attrs.rs
+++ b/compiler/rustc_codegen_ssa/src/codegen_attrs.rs
@@ -4,12 +4,12 @@ use rustc_abi::{Align, ExternAbi};
 use rustc_ast::expand::autodiff_attrs::{AutoDiffAttrs, DiffActivity, DiffMode};
 use rustc_ast::{LitKind, MetaItem, MetaItemInner, attr};
 use rustc_attr_data_structures::{
-    AttributeKind, InlineAttr, InstructionSetAttr, OptimizeAttr, UsedBy, find_attr,
+    AttributeKind, InlineAttr, InstructionSetAttr, UsedBy, find_attr,
 };
 use rustc_hir::def::DefKind;
 use rustc_hir::def_id::{DefId, LOCAL_CRATE, LocalDefId};
 use rustc_hir::weak_lang_items::WEAK_LANG_ITEMS;
-use rustc_hir::{self as hir, LangItem, lang_items};
+use rustc_hir::{self as hir, Attribute, LangItem, lang_items};
 use rustc_middle::middle::codegen_fn_attrs::{
     CodegenFnAttrFlags, CodegenFnAttrs, PatchableFunctionEntry,
 };
@@ -53,77 +53,196 @@ fn linkage_by_name(tcx: TyCtxt<'_>, def_id: LocalDefId, name: &str) -> Linkage {
     }
 }
 
-fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs {
-    if cfg!(debug_assertions) {
-        let def_kind = tcx.def_kind(did);
-        assert!(
-            def_kind.has_codegen_attrs(),
-            "unexpected `def_kind` in `codegen_fn_attrs`: {def_kind:?}",
-        );
+/// In some cases, attributes are only valid on functions, but it's the `check_attr`
+/// pass that checks that they aren't used anywhere else, rather than this module.
+/// In these cases, we bail from performing further checks that are only meaningful for
+/// functions (such as calling `fn_sig`, which ICEs if given a non-function). We also
+/// report a delayed bug, just in case `check_attr` isn't doing its job.
+fn try_fn_sig<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    did: LocalDefId,
+    attr_span: Span,
+) -> Option<ty::EarlyBinder<'tcx, ty::PolyFnSig<'tcx>>> {
+    use DefKind::*;
+
+    let def_kind = tcx.def_kind(did);
+    if let Fn | AssocFn | Variant | Ctor(..) = def_kind {
+        Some(tcx.fn_sig(did))
+    } else {
+        tcx.dcx().span_delayed_bug(attr_span, "this attribute can only be applied to functions");
+        None
     }
+}
 
-    let attrs = tcx.hir_attrs(tcx.local_def_id_to_hir_id(did));
-    let mut codegen_fn_attrs = CodegenFnAttrs::new();
-    if tcx.should_inherit_track_caller(did) {
-        codegen_fn_attrs.flags |= CodegenFnAttrFlags::TRACK_CALLER;
+// FIXME(jdonszelmann): remove when instruction_set becomes a parsed attr
+fn parse_instruction_set_attr(tcx: TyCtxt<'_>, attr: &Attribute) -> Option<InstructionSetAttr> {
+    let list = attr.meta_item_list()?;
+
+    match &list[..] {
+        [MetaItemInner::MetaItem(set)] => {
+            let segments = set.path.segments.iter().map(|x| x.ident.name).collect::<Vec<_>>();
+            match segments.as_slice() {
+                [sym::arm, sym::a32 | sym::t32] if !tcx.sess.target.has_thumb_interworking => {
+                    tcx.dcx().emit_err(errors::UnsupportedInstructionSet { span: attr.span() });
+                    None
+                }
+                [sym::arm, sym::a32] => Some(InstructionSetAttr::ArmA32),
+                [sym::arm, sym::t32] => Some(InstructionSetAttr::ArmT32),
+                _ => {
+                    tcx.dcx().emit_err(errors::InvalidInstructionSet { span: attr.span() });
+                    None
+                }
+            }
+        }
+        [] => {
+            tcx.dcx().emit_err(errors::BareInstructionSet { span: attr.span() });
+            None
+        }
+        _ => {
+            tcx.dcx().emit_err(errors::MultipleInstructionSet { span: attr.span() });
+            None
+        }
+    }
+}
+
+// FIXME(jdonszelmann): remove when linkage becomes a parsed attr
+fn parse_linkage_attr(tcx: TyCtxt<'_>, did: LocalDefId, attr: &Attribute) -> Option<Linkage> {
+    let val = attr.value_str()?;
+    let linkage = linkage_by_name(tcx, did, val.as_str());
+    Some(linkage)
+}
+
+// FIXME(jdonszelmann): remove when no_sanitize becomes a parsed attr
+fn parse_no_sanitize_attr(tcx: TyCtxt<'_>, attr: &Attribute) -> Option<SanitizerSet> {
+    let list = attr.meta_item_list()?;
+    let mut sanitizer_set = SanitizerSet::empty();
+
+    for item in list.iter() {
+        match item.name() {
+            Some(sym::address) => {
+                sanitizer_set |= SanitizerSet::ADDRESS | SanitizerSet::KERNELADDRESS
+            }
+            Some(sym::cfi) => sanitizer_set |= SanitizerSet::CFI,
+            Some(sym::kcfi) => sanitizer_set |= SanitizerSet::KCFI,
+            Some(sym::memory) => sanitizer_set |= SanitizerSet::MEMORY,
+            Some(sym::memtag) => sanitizer_set |= SanitizerSet::MEMTAG,
+            Some(sym::shadow_call_stack) => sanitizer_set |= SanitizerSet::SHADOWCALLSTACK,
+            Some(sym::thread) => sanitizer_set |= SanitizerSet::THREAD,
+            Some(sym::hwaddress) => sanitizer_set |= SanitizerSet::HWADDRESS,
+            _ => {
+                tcx.dcx().emit_err(errors::InvalidNoSanitize { span: item.span() });
+            }
+        }
     }
 
+    Some(sanitizer_set)
+}
+
+// FIXME(jdonszelmann): remove when patchable_function_entry becomes a parsed attr
+fn parse_patchable_function_entry(
+    tcx: TyCtxt<'_>,
+    attr: &Attribute,
+) -> Option<PatchableFunctionEntry> {
+    attr.meta_item_list().and_then(|l| {
+        let mut prefix = None;
+        let mut entry = None;
+        for item in l {
+            let Some(meta_item) = item.meta_item() else {
+                tcx.dcx().emit_err(errors::ExpectedNameValuePair { span: item.span() });
+                continue;
+            };
+
+            let Some(name_value_lit) = meta_item.name_value_literal() else {
+                tcx.dcx().emit_err(errors::ExpectedNameValuePair { span: item.span() });
+                continue;
+            };
+
+            let attrib_to_write = match meta_item.name() {
+                Some(sym::prefix_nops) => &mut prefix,
+                Some(sym::entry_nops) => &mut entry,
+                _ => {
+                    tcx.dcx().emit_err(errors::UnexpectedParameterName {
+                        span: item.span(),
+                        prefix_nops: sym::prefix_nops,
+                        entry_nops: sym::entry_nops,
+                    });
+                    continue;
+                }
+            };
+
+            let rustc_ast::LitKind::Int(val, _) = name_value_lit.kind else {
+                tcx.dcx().emit_err(errors::InvalidLiteralValue { span: name_value_lit.span });
+                continue;
+            };
+
+            let Ok(val) = val.get().try_into() else {
+                tcx.dcx().emit_err(errors::OutOfRangeInteger { span: name_value_lit.span });
+                continue;
+            };
+
+            *attrib_to_write = Some(val);
+        }
+
+        if let (None, None) = (prefix, entry) {
+            tcx.dcx().span_err(attr.span(), "must specify at least one parameter");
+        }
+
+        Some(PatchableFunctionEntry::from_prefix_and_entry(prefix.unwrap_or(0), entry.unwrap_or(0)))
+    })
+}
+
+/// Spans that are collected when processing built-in attributes,
+/// that are useful for emitting diagnostics later.
+#[derive(Default)]
+struct InterestingAttributeDiagnosticSpans {
+    link_ordinal: Option<Span>,
+    no_sanitize: Option<Span>,
+    inline: Option<Span>,
+    no_mangle: Option<Span>,
+}
+
+/// Process the builtin attrs ([`hir::Attribute`]) on the item.
+/// Many of them directly translate to codegen attrs.
+fn process_builtin_attrs(
+    tcx: TyCtxt<'_>,
+    did: LocalDefId,
+    attrs: &[Attribute],
+    codegen_fn_attrs: &mut CodegenFnAttrs,
+) -> InterestingAttributeDiagnosticSpans {
+    let mut interesting_spans = InterestingAttributeDiagnosticSpans::default();
+    let rust_target_features = tcx.rust_target_features(LOCAL_CRATE);
+
     // If our rustc version supports autodiff/enzyme, then we call our handler
     // to check for any `#[rustc_autodiff(...)]` attributes.
+    // FIXME(jdonszelmann): merge with loop below
     if cfg!(llvm_enzyme) {
         let ad = autodiff_attrs(tcx, did.into());
         codegen_fn_attrs.autodiff_item = ad;
     }
 
-    // When `no_builtins` is applied at the crate level, we should add the
-    // `no-builtins` attribute to each function to ensure it takes effect in LTO.
-    let crate_attrs = tcx.hir_attrs(rustc_hir::CRATE_HIR_ID);
-    let no_builtins = attr::contains_name(crate_attrs, sym::no_builtins);
-    if no_builtins {
-        codegen_fn_attrs.flags |= CodegenFnAttrFlags::NO_BUILTINS;
-    }
-
-    let rust_target_features = tcx.rust_target_features(LOCAL_CRATE);
-
-    let mut link_ordinal_span = None;
-    let mut no_sanitize_span = None;
-
     for attr in attrs.iter() {
-        // In some cases, attribute are only valid on functions, but it's the `check_attr`
-        // pass that check that they aren't used anywhere else, rather this module.
-        // In these cases, we bail from performing further checks that are only meaningful for
-        // functions (such as calling `fn_sig`, which ICEs if given a non-function). We also
-        // report a delayed bug, just in case `check_attr` isn't doing its job.
-        let fn_sig = |attr_span| {
-            use DefKind::*;
-
-            let def_kind = tcx.def_kind(did);
-            if let Fn | AssocFn | Variant | Ctor(..) = def_kind {
-                Some(tcx.fn_sig(did))
-            } else {
-                tcx.dcx()
-                    .span_delayed_bug(attr_span, "this attribute can only be applied to functions");
-                None
-            }
-        };
-
         if let hir::Attribute::Parsed(p) = attr {
             match p {
                 AttributeKind::Cold(_) => codegen_fn_attrs.flags |= CodegenFnAttrFlags::COLD,
                 AttributeKind::ExportName { name, .. } => {
-                    codegen_fn_attrs.export_name = Some(*name);
+                    codegen_fn_attrs.export_name = Some(*name)
+                }
+                AttributeKind::Inline(inline, span) => {
+                    codegen_fn_attrs.inline = *inline;
+                    interesting_spans.inline = Some(*span);
                 }
                 AttributeKind::Naked(_) => codegen_fn_attrs.flags |= CodegenFnAttrFlags::NAKED,
                 AttributeKind::Align { align, .. } => codegen_fn_attrs.alignment = Some(*align),
                 AttributeKind::LinkName { name, .. } => codegen_fn_attrs.link_name = Some(*name),
                 AttributeKind::LinkOrdinal { ordinal, span } => {
                     codegen_fn_attrs.link_ordinal = Some(*ordinal);
-                    link_ordinal_span = Some(*span);
+                    interesting_spans.link_ordinal = Some(*span);
                 }
                 AttributeKind::LinkSection { name, .. } => {
                     codegen_fn_attrs.link_section = Some(*name)
                 }
                 AttributeKind::NoMangle(attr_span) => {
+                    interesting_spans.no_mangle = Some(*attr_span);
                     if tcx.opt_item_name(did.to_def_id()).is_some() {
                         codegen_fn_attrs.flags |= CodegenFnAttrFlags::NO_MANGLE;
                     } else {
@@ -137,6 +256,7 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs {
                         });
                     }
                 }
+                AttributeKind::Optimize(optimize, _) => codegen_fn_attrs.optimize = *optimize,
                 AttributeKind::TargetFeature(features, attr_span) => {
                     let Some(sig) = tcx.hir_node_by_def_id(did).fn_sig() else {
                         tcx.dcx().span_delayed_bug(*attr_span, "target_feature applied to non-fn");
@@ -184,7 +304,7 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs {
                     let is_closure = tcx.is_closure_like(did.to_def_id());
 
                     if !is_closure
-                        && let Some(fn_sig) = fn_sig(*attr_span)
+                        && let Some(fn_sig) = try_fn_sig(tcx, did, *attr_span)
                         && fn_sig.skip_binder().abi() != ExternAbi::Rust
                     {
                         tcx.dcx().emit_err(errors::RequiresRustAbi { span: *attr_span });
@@ -232,155 +352,49 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs {
             }
             sym::thread_local => codegen_fn_attrs.flags |= CodegenFnAttrFlags::THREAD_LOCAL,
             sym::linkage => {
-                if let Some(val) = attr.value_str() {
-                    let linkage = Some(linkage_by_name(tcx, did, val.as_str()));
-                    if tcx.is_foreign_item(did) {
-                        codegen_fn_attrs.import_linkage = linkage;
-
-                        if tcx.is_mutable_static(did.into()) {
-                            let mut diag = tcx.dcx().struct_span_err(
-                                attr.span(),
-                                "extern mutable statics are not allowed with `#[linkage]`",
-                            );
-                            diag.note(
-                                "marking the extern static mutable would allow changing which \
-                                 symbol the static references rather than make the target of the \
-                                 symbol mutable",
-                            );
-                            diag.emit();
-                        }
-                    } else {
-                        codegen_fn_attrs.linkage = linkage;
+                let linkage = parse_linkage_attr(tcx, did, attr);
+
+                if tcx.is_foreign_item(did) {
+                    codegen_fn_attrs.import_linkage = linkage;
+
+                    if tcx.is_mutable_static(did.into()) {
+                        let mut diag = tcx.dcx().struct_span_err(
+                            attr.span(),
+                            "extern mutable statics are not allowed with `#[linkage]`",
+                        );
+                        diag.note(
+                            "marking the extern static mutable would allow changing which \
+                            symbol the static references rather than make the target of the \
+                            symbol mutable",
+                        );
+                        diag.emit();
                     }
+                } else {
+                    codegen_fn_attrs.linkage = linkage;
                 }
             }
             sym::no_sanitize => {
-                no_sanitize_span = Some(attr.span());
-                if let Some(list) = attr.meta_item_list() {
-                    for item in list.iter() {
-                        match item.name() {
-                            Some(sym::address) => {
-                                codegen_fn_attrs.no_sanitize |=
-                                    SanitizerSet::ADDRESS | SanitizerSet::KERNELADDRESS
-                            }
-                            Some(sym::cfi) => codegen_fn_attrs.no_sanitize |= SanitizerSet::CFI,
-                            Some(sym::kcfi) => codegen_fn_attrs.no_sanitize |= SanitizerSet::KCFI,
-                            Some(sym::memory) => {
-                                codegen_fn_attrs.no_sanitize |= SanitizerSet::MEMORY
-                            }
-                            Some(sym::memtag) => {
-                                codegen_fn_attrs.no_sanitize |= SanitizerSet::MEMTAG
-                            }
-                            Some(sym::shadow_call_stack) => {
-                                codegen_fn_attrs.no_sanitize |= SanitizerSet::SHADOWCALLSTACK
-                            }
-                            Some(sym::thread) => {
-                                codegen_fn_attrs.no_sanitize |= SanitizerSet::THREAD
-                            }
-                            Some(sym::hwaddress) => {
-                                codegen_fn_attrs.no_sanitize |= SanitizerSet::HWADDRESS
-                            }
-                            _ => {
-                                tcx.dcx().emit_err(errors::InvalidNoSanitize { span: item.span() });
-                            }
-                        }
-                    }
-                }
+                interesting_spans.no_sanitize = Some(attr.span());
+                codegen_fn_attrs.no_sanitize |=
+                    parse_no_sanitize_attr(tcx, attr).unwrap_or_default();
             }
             sym::instruction_set => {
-                codegen_fn_attrs.instruction_set =
-                    attr.meta_item_list().and_then(|l| match &l[..] {
-                        [MetaItemInner::MetaItem(set)] => {
-                            let segments =
-                                set.path.segments.iter().map(|x| x.ident.name).collect::<Vec<_>>();
-                            match segments.as_slice() {
-                                [sym::arm, sym::a32 | sym::t32]
-                                    if !tcx.sess.target.has_thumb_interworking =>
-                                {
-                                    tcx.dcx().emit_err(errors::UnsupportedInstructionSet {
-                                        span: attr.span(),
-                                    });
-                                    None
-                                }
-                                [sym::arm, sym::a32] => Some(InstructionSetAttr::ArmA32),
-                                [sym::arm, sym::t32] => Some(InstructionSetAttr::ArmT32),
-                                _ => {
-                                    tcx.dcx().emit_err(errors::InvalidInstructionSet {
-                                        span: attr.span(),
-                                    });
-                                    None
-                                }
-                            }
-                        }
-                        [] => {
-                            tcx.dcx().emit_err(errors::BareInstructionSet { span: attr.span() });
-                            None
-                        }
-                        _ => {
-                            tcx.dcx()
-                                .emit_err(errors::MultipleInstructionSet { span: attr.span() });
-                            None
-                        }
-                    })
+                codegen_fn_attrs.instruction_set = parse_instruction_set_attr(tcx, attr)
             }
             sym::patchable_function_entry => {
-                codegen_fn_attrs.patchable_function_entry = attr.meta_item_list().and_then(|l| {
-                    let mut prefix = None;
-                    let mut entry = None;
-                    for item in l {
-                        let Some(meta_item) = item.meta_item() else {
-                            tcx.dcx().emit_err(errors::ExpectedNameValuePair { span: item.span() });
-                            continue;
-                        };
-
-                        let Some(name_value_lit) = meta_item.name_value_literal() else {
-                            tcx.dcx().emit_err(errors::ExpectedNameValuePair { span: item.span() });
-                            continue;
-                        };
-
-                        let attrib_to_write = match meta_item.name() {
-                            Some(sym::prefix_nops) => &mut prefix,
-                            Some(sym::entry_nops) => &mut entry,
-                            _ => {
-                                tcx.dcx().emit_err(errors::UnexpectedParameterName {
-                                    span: item.span(),
-                                    prefix_nops: sym::prefix_nops,
-                                    entry_nops: sym::entry_nops,
-                                });
-                                continue;
-                            }
-                        };
-
-                        let rustc_ast::LitKind::Int(val, _) = name_value_lit.kind else {
-                            tcx.dcx().emit_err(errors::InvalidLiteralValue {
-                                span: name_value_lit.span,
-                            });
-                            continue;
-                        };
-
-                        let Ok(val) = val.get().try_into() else {
-                            tcx.dcx()
-                                .emit_err(errors::OutOfRangeInteger { span: name_value_lit.span });
-                            continue;
-                        };
-
-                        *attrib_to_write = Some(val);
-                    }
-
-                    if let (None, None) = (prefix, entry) {
-                        tcx.dcx().span_err(attr.span(), "must specify at least one parameter");
-                    }
-
-                    Some(PatchableFunctionEntry::from_prefix_and_entry(
-                        prefix.unwrap_or(0),
-                        entry.unwrap_or(0),
-                    ))
-                })
+                codegen_fn_attrs.patchable_function_entry =
+                    parse_patchable_function_entry(tcx, attr);
             }
             _ => {}
         }
     }
 
+    interesting_spans
+}
+
+/// Applies overrides for codegen fn attrs. These often have a specific reason why they're necessary.
+/// Please comment why when adding a new one!
+fn apply_overrides(tcx: TyCtxt<'_>, did: LocalDefId, codegen_fn_attrs: &mut CodegenFnAttrs) {
     // Apply the minimum function alignment here. This ensures that a function's alignment is
     // determined by the `-C` flags of the crate it is defined in, not the `-C` flags of the crate
     // it happens to be codegen'd (or const-eval'd) in.
@@ -390,15 +404,6 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs {
     // On trait methods, inherit the `#[align]` of the trait's method prototype.
     codegen_fn_attrs.alignment = Ord::max(codegen_fn_attrs.alignment, tcx.inherited_align(did));
 
-    let inline_span;
-    (codegen_fn_attrs.inline, inline_span) = if let Some((inline_attr, span)) =
-        find_attr!(attrs, AttributeKind::Inline(i, span) => (*i, *span))
-    {
-        (inline_attr, Some(span))
-    } else {
-        (InlineAttr::None, None)
-    };
-
     // naked function MUST NOT be inlined! This attribute is required for the rust compiler itself,
     // but not for the code generation backend because at that point the naked function will just be
     // a declaration, with a definition provided in global assembly.
@@ -406,9 +411,6 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs {
         codegen_fn_attrs.inline = InlineAttr::Never;
     }
 
-    codegen_fn_attrs.optimize =
-        find_attr!(attrs, AttributeKind::Optimize(i, _) => *i).unwrap_or(OptimizeAttr::Default);
-
     // #73631: closures inherit `#[target_feature]` annotations
     //
     // If this closure is marked `#[inline(always)]`, simply skip adding `#[target_feature]`.
@@ -431,6 +433,26 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs {
         }
     }
 
+    // When `no_builtins` is applied at the crate level, we should add the
+    // `no-builtins` attribute to each function to ensure it takes effect in LTO.
+    let crate_attrs = tcx.hir_attrs(rustc_hir::CRATE_HIR_ID);
+    let no_builtins = attr::contains_name(crate_attrs, sym::no_builtins);
+    if no_builtins {
+        codegen_fn_attrs.flags |= CodegenFnAttrFlags::NO_BUILTINS;
+    }
+
+    // inherit track-caller properly
+    if tcx.should_inherit_track_caller(did) {
+        codegen_fn_attrs.flags |= CodegenFnAttrFlags::TRACK_CALLER;
+    }
+}
+
+fn check_result(
+    tcx: TyCtxt<'_>,
+    did: LocalDefId,
+    interesting_spans: InterestingAttributeDiagnosticSpans,
+    codegen_fn_attrs: &CodegenFnAttrs,
+) {
     // If a function uses `#[target_feature]` it can't be inlined into general
     // purpose functions as they wouldn't have the right target features
     // enabled. For that reason we also forbid `#[inline(always)]` as it can't be
@@ -446,14 +468,16 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs {
     // llvm/llvm-project#70563).
     if !codegen_fn_attrs.target_features.is_empty()
         && matches!(codegen_fn_attrs.inline, InlineAttr::Always)
-        && let Some(span) = inline_span
+        && let Some(span) = interesting_spans.inline
     {
         tcx.dcx().span_err(span, "cannot use `#[inline(always)]` with `#[target_feature]`");
     }
 
+    // warn that inline has no effect when no_sanitize is present
     if !codegen_fn_attrs.no_sanitize.is_empty()
         && codegen_fn_attrs.inline.always()
-        && let (Some(no_sanitize_span), Some(inline_span)) = (no_sanitize_span, inline_span)
+        && let (Some(no_sanitize_span), Some(inline_span)) =
+            (interesting_spans.no_sanitize, interesting_spans.inline)
     {
         let hir_id = tcx.local_def_id_to_hir_id(did);
         tcx.node_span_lint(lint::builtin::INLINE_NO_SANITIZE, hir_id, no_sanitize_span, |lint| {
@@ -462,6 +486,47 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs {
         })
     }
 
+    // error when specifying link_name together with link_ordinal
+    if let Some(_) = codegen_fn_attrs.link_name
+        && let Some(_) = codegen_fn_attrs.link_ordinal
+    {
+        let msg = "cannot use `#[link_name]` with `#[link_ordinal]`";
+        if let Some(span) = interesting_spans.link_ordinal {
+            tcx.dcx().span_err(span, msg);
+        } else {
+            tcx.dcx().err(msg);
+        }
+    }
+
+    if let Some(features) = check_tied_features(
+        tcx.sess,
+        &codegen_fn_attrs
+            .target_features
+            .iter()
+            .map(|features| (features.name.as_str(), true))
+            .collect(),
+    ) {
+        let span =
+            find_attr!(tcx.get_all_attrs(did), AttributeKind::TargetFeature(_, span) => *span)
+                .unwrap_or_else(|| tcx.def_span(did));
+
+        tcx.dcx()
+            .create_err(errors::TargetFeatureDisableOrEnable {
+                features,
+                span: Some(span),
+                missing_features: Some(errors::MissingFeatures),
+            })
+            .emit();
+    }
+}
+
+fn handle_lang_items(
+    tcx: TyCtxt<'_>,
+    did: LocalDefId,
+    interesting_spans: &InterestingAttributeDiagnosticSpans,
+    attrs: &[Attribute],
+    codegen_fn_attrs: &mut CodegenFnAttrs,
+) {
     // Weak lang items have the same semantics as "std internal" symbols in the
     // sense that they're preserved through all our LTO passes and only
     // strippable by the linker.
@@ -478,68 +543,59 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs {
             codegen_fn_attrs.link_name = Some(link_name);
         }
     }
-    check_link_name_xor_ordinal(tcx, &codegen_fn_attrs, link_ordinal_span);
 
+    // error when using no_mangle on a lang item item
     if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::RUSTC_STD_INTERNAL_SYMBOL)
         && codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NO_MANGLE)
     {
-        let no_mangle_span =
-            find_attr!(attrs, AttributeKind::NoMangle(no_mangle_span) => *no_mangle_span)
-                .unwrap_or_default();
         let lang_item =
             lang_items::extract(attrs).map_or(None, |(name, _span)| LangItem::from_name(name));
         let mut err = tcx
             .dcx()
             .struct_span_err(
-                no_mangle_span,
+                interesting_spans.no_mangle.unwrap_or_default(),
                 "`#[no_mangle]` cannot be used on internal language items",
             )
             .with_note("Rustc requires this item to have a specific mangled name.")
             .with_span_label(tcx.def_span(did), "should be the internal language item");
-        if let Some(lang_item) = lang_item {
-            if let Some(link_name) = lang_item.link_name() {
-                err = err
-                    .with_note("If you are trying to prevent mangling to ease debugging, many")
-                    .with_note(format!(
-                        "debuggers support a command such as `rbreak {link_name}` to"
-                    ))
-                    .with_note(format!(
-                        "match `.*{link_name}.*` instead of `break {link_name}` on a specific name"
-                    ))
-            }
+        if let Some(lang_item) = lang_item
+            && let Some(link_name) = lang_item.link_name()
+        {
+            err = err
+                .with_note("If you are trying to prevent mangling to ease debugging, many")
+                .with_note(format!("debuggers support a command such as `rbreak {link_name}` to"))
+                .with_note(format!(
+                    "match `.*{link_name}.*` instead of `break {link_name}` on a specific name"
+                ))
         }
         err.emit();
     }
+}
 
-    // Any linkage to LLVM intrinsics for now forcibly marks them all as never
-    // unwinds since LLVM sometimes can't handle codegen which `invoke`s
-    // intrinsic functions.
-    if let Some(name) = &codegen_fn_attrs.link_name
-        && name.as_str().starts_with("llvm.")
-    {
-        codegen_fn_attrs.flags |= CodegenFnAttrFlags::NEVER_UNWIND;
+/// Generate the [`CodegenFnAttrs`] for an item (identified by the [`LocalDefId`]).
+///
+/// This happens in 4 stages:
+/// - apply built-in attributes that directly translate to codegen attributes.
+/// - handle lang items. These have special codegen attrs applied to them.
+/// - apply overrides, like minimum requirements for alignment and other settings that don't rely directly the built-in attrs on the item.
+///   overrides come after applying built-in attributes since they may only apply when certain attributes were already set in the stage before.
+/// - check that the result is valid. There's various ways in which this may not be the case, such as certain combinations of attrs.
+fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs {
+    if cfg!(debug_assertions) {
+        let def_kind = tcx.def_kind(did);
+        assert!(
+            def_kind.has_codegen_attrs(),
+            "unexpected `def_kind` in `codegen_fn_attrs`: {def_kind:?}",
+        );
     }
 
-    if let Some(features) = check_tied_features(
-        tcx.sess,
-        &codegen_fn_attrs
-            .target_features
-            .iter()
-            .map(|features| (features.name.as_str(), true))
-            .collect(),
-    ) {
-        let span =
-            find_attr!(tcx.get_all_attrs(did), AttributeKind::TargetFeature(_, span) => *span)
-                .unwrap_or_else(|| tcx.def_span(did));
+    let mut codegen_fn_attrs = CodegenFnAttrs::new();
+    let attrs = tcx.hir_attrs(tcx.local_def_id_to_hir_id(did));
 
-        tcx.dcx()
-            .create_err(errors::TargetFeatureDisableOrEnable {
-                features,
-                span: Some(span),
-                missing_features: Some(errors::MissingFeatures),
-            })
-            .emit();
-    }
+    let interesting_spans = process_builtin_attrs(tcx, did, attrs, &mut codegen_fn_attrs);
+    handle_lang_items(tcx, did, &interesting_spans, attrs, &mut codegen_fn_attrs);
+    apply_overrides(tcx, did, &mut codegen_fn_attrs);
+    check_result(tcx, did, interesting_spans, &codegen_fn_attrs);
 
     codegen_fn_attrs
 }
@@ -566,27 +622,12 @@ fn inherited_align<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> Option<Align> {
     tcx.codegen_fn_attrs(opt_trait_item(tcx, def_id)?).alignment
 }
 
-fn check_link_name_xor_ordinal(
-    tcx: TyCtxt<'_>,
-    codegen_fn_attrs: &CodegenFnAttrs,
-    inline_span: Option<Span>,
-) {
-    if codegen_fn_attrs.link_name.is_none() || codegen_fn_attrs.link_ordinal.is_none() {
-        return;
-    }
-    let msg = "cannot use `#[link_name]` with `#[link_ordinal]`";
-    if let Some(span) = inline_span {
-        tcx.dcx().span_err(span, msg);
-    } else {
-        tcx.dcx().err(msg);
-    }
-}
-
 /// We now check the #\[rustc_autodiff\] attributes which we generated from the #[autodiff(...)]
 /// macros. There are two forms. The pure one without args to mark primal functions (the functions
 /// being differentiated). The other form is #[rustc_autodiff(Mode, ActivityList)] on top of the
 /// placeholder functions. We wrote the rustc_autodiff attributes ourself, so this should never
 /// panic, unless we introduced a bug when parsing the autodiff macro.
+//FIXME(jdonszelmann): put in the main loop. No need to have two..... :/ Let's do that when we make autodiff parsed.
 fn autodiff_attrs(tcx: TyCtxt<'_>, id: DefId) -> Option<AutoDiffAttrs> {
     let attrs = tcx.get_attrs(id, sym::rustc_autodiff);
 
diff --git a/compiler/rustc_codegen_ssa/src/common.rs b/compiler/rustc_codegen_ssa/src/common.rs
index 48565e0b4de..a6fd6c763ed 100644
--- a/compiler/rustc_codegen_ssa/src/common.rs
+++ b/compiler/rustc_codegen_ssa/src/common.rs
@@ -148,7 +148,7 @@ pub(crate) fn shift_mask_val<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
 pub fn asm_const_to_str<'tcx>(
     tcx: TyCtxt<'tcx>,
     sp: Span,
-    const_value: mir::ConstValue<'tcx>,
+    const_value: mir::ConstValue,
     ty_and_layout: TyAndLayout<'tcx>,
 ) -> String {
     let mir::ConstValue::Scalar(scalar) = const_value else {
diff --git a/compiler/rustc_codegen_ssa/src/errors.rs b/compiler/rustc_codegen_ssa/src/errors.rs
index 9040915b6af..3d787d8bdbd 100644
--- a/compiler/rustc_codegen_ssa/src/errors.rs
+++ b/compiler/rustc_codegen_ssa/src/errors.rs
@@ -1294,3 +1294,20 @@ pub(crate) struct FeatureNotValid<'a> {
     #[help]
     pub plus_hint: bool,
 }
+
+#[derive(Diagnostic)]
+#[diag(codegen_ssa_lto_disallowed)]
+pub(crate) struct LtoDisallowed;
+
+#[derive(Diagnostic)]
+#[diag(codegen_ssa_lto_dylib)]
+pub(crate) struct LtoDylib;
+
+#[derive(Diagnostic)]
+#[diag(codegen_ssa_lto_proc_macro)]
+pub(crate) struct LtoProcMacro;
+
+#[derive(Diagnostic)]
+#[diag(codegen_ssa_dynamic_linking_with_lto)]
+#[note]
+pub(crate) struct DynamicLinkingWithLTO;
diff --git a/compiler/rustc_codegen_ssa/src/mir/analyze.rs b/compiler/rustc_codegen_ssa/src/mir/analyze.rs
index 6d6465dd798..c2c023af090 100644
--- a/compiler/rustc_codegen_ssa/src/mir/analyze.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/analyze.rs
@@ -1,12 +1,13 @@
 //! An analysis to determine which locals require allocas and
 //! which do not.
 
+use rustc_abi as abi;
 use rustc_data_structures::graph::dominators::Dominators;
 use rustc_index::bit_set::DenseBitSet;
 use rustc_index::{IndexSlice, IndexVec};
 use rustc_middle::mir::visit::{MutatingUseContext, NonMutatingUseContext, PlaceContext, Visitor};
 use rustc_middle::mir::{self, DefLocation, Location, TerminatorKind, traversal};
-use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf};
+use rustc_middle::ty::layout::LayoutOf;
 use rustc_middle::{bug, span_bug};
 use tracing::debug;
 
@@ -99,63 +100,75 @@ impl<'a, 'b, 'tcx, Bx: BuilderMethods<'b, 'tcx>> LocalAnalyzer<'a, 'b, 'tcx, Bx>
         context: PlaceContext,
         location: Location,
     ) {
-        let cx = self.fx.cx;
-
-        if let Some((place_base, elem)) = place_ref.last_projection() {
-            let mut base_context = if context.is_mutating_use() {
-                PlaceContext::MutatingUse(MutatingUseContext::Projection)
-            } else {
-                PlaceContext::NonMutatingUse(NonMutatingUseContext::Projection)
-            };
-
-            // Allow uses of projections that are ZSTs or from scalar fields.
-            let is_consume = matches!(
-                context,
-                PlaceContext::NonMutatingUse(
-                    NonMutatingUseContext::Copy | NonMutatingUseContext::Move,
-                )
-            );
-            if is_consume {
-                let base_ty = place_base.ty(self.fx.mir, cx.tcx());
-                let base_ty = self.fx.monomorphize(base_ty);
-
-                // ZSTs don't require any actual memory access.
-                let elem_ty = base_ty.projection_ty(cx.tcx(), self.fx.monomorphize(elem)).ty;
-                let span = self.fx.mir.local_decls[place_ref.local].source_info.span;
-                if cx.spanned_layout_of(elem_ty, span).is_zst() {
-                    return;
+        if !place_ref.projection.is_empty() {
+            const COPY_CONTEXT: PlaceContext =
+                PlaceContext::NonMutatingUse(NonMutatingUseContext::Copy);
+
+            // `PlaceElem::Index` is the only variant that can mention other `Local`s,
+            // so check for those up-front before any potential short-circuits.
+            for elem in place_ref.projection {
+                if let mir::PlaceElem::Index(index_local) = *elem {
+                    self.visit_local(index_local, COPY_CONTEXT, location);
                 }
+            }
 
-                if let mir::ProjectionElem::Field(..) = elem {
-                    let layout = cx.spanned_layout_of(base_ty.ty, span);
-                    if cx.is_backend_immediate(layout) || cx.is_backend_scalar_pair(layout) {
-                        // Recurse with the same context, instead of `Projection`,
-                        // potentially stopping at non-operand projections,
-                        // which would trigger `not_ssa` on locals.
-                        base_context = context;
-                    }
-                }
+            // If our local is already memory, nothing can make it *more* memory
+            // so we don't need to bother checking the projections further.
+            if self.locals[place_ref.local] == LocalKind::Memory {
+                return;
             }
 
-            if let mir::ProjectionElem::Deref = elem {
-                // Deref projections typically only read the pointer.
-                base_context = PlaceContext::NonMutatingUse(NonMutatingUseContext::Copy);
+            if place_ref.is_indirect_first_projection() {
+                // If this starts with a `Deref`, we only need to record a read of the
+                // pointer being dereferenced, as all the subsequent projections are
+                // working on a place which is always supported. (And because we're
+                // looking at codegen MIR, it can only happen as the first projection.)
+                self.visit_local(place_ref.local, COPY_CONTEXT, location);
+                return;
             }
 
-            self.process_place(&place_base, base_context, location);
-            // HACK(eddyb) this emulates the old `visit_projection_elem`, this
-            // entire `visit_place`-like `process_place` method should be rewritten,
-            // now that we have moved to the "slice of projections" representation.
-            if let mir::ProjectionElem::Index(local) = elem {
-                self.visit_local(
-                    local,
-                    PlaceContext::NonMutatingUse(NonMutatingUseContext::Copy),
-                    location,
-                );
+            if context.is_mutating_use() {
+                // If it's a mutating use it doesn't matter what the projections are,
+                // if there are *any* then we need a place to write. (For example,
+                // `_1 = Foo()` works in SSA but `_2.0 = Foo()` does not.)
+                let mut_projection = PlaceContext::MutatingUse(MutatingUseContext::Projection);
+                self.visit_local(place_ref.local, mut_projection, location);
+                return;
             }
-        } else {
-            self.visit_local(place_ref.local, context, location);
+
+            // Scan through to ensure the only projections are those which
+            // `FunctionCx::maybe_codegen_consume_direct` can handle.
+            let base_ty = self.fx.monomorphized_place_ty(mir::PlaceRef::from(place_ref.local));
+            let mut layout = self.fx.cx.layout_of(base_ty);
+            for elem in place_ref.projection {
+                layout = match *elem {
+                    mir::PlaceElem::Field(fidx, ..) => layout.field(self.fx.cx, fidx.as_usize()),
+                    mir::PlaceElem::Downcast(_, vidx)
+                        if let abi::Variants::Single { index: single_variant } =
+                            layout.variants
+                            && vidx == single_variant =>
+                    {
+                        layout.for_variant(self.fx.cx, vidx)
+                    }
+                    mir::PlaceElem::Subtype(subtype_ty) => {
+                        let subtype_ty = self.fx.monomorphize(subtype_ty);
+                        self.fx.cx.layout_of(subtype_ty)
+                    }
+                    _ => {
+                        self.locals[place_ref.local] = LocalKind::Memory;
+                        return;
+                    }
+                }
+            }
+            debug_assert!(
+                !self.fx.cx.is_backend_ref(layout),
+                "Post-projection {place_ref:?} layout should be non-Ref, but it's {layout:?}",
+            );
         }
+
+        // Even with supported projections, we still need to have `visit_local`
+        // check for things that can't be done in SSA (like `SharedBorrow`).
+        self.visit_local(place_ref.local, context, location);
     }
 }
 
@@ -170,11 +183,6 @@ impl<'a, 'b, 'tcx, Bx: BuilderMethods<'b, 'tcx>> Visitor<'tcx> for LocalAnalyzer
 
         if let Some(local) = place.as_local() {
             self.define(local, DefLocation::Assignment(location));
-            if self.locals[local] != LocalKind::Memory {
-                if !self.fx.rvalue_creates_operand(rvalue) {
-                    self.locals[local] = LocalKind::Memory;
-                }
-            }
         } else {
             self.visit_place(place, PlaceContext::MutatingUse(MutatingUseContext::Store), location);
         }
diff --git a/compiler/rustc_codegen_ssa/src/mir/constant.rs b/compiler/rustc_codegen_ssa/src/mir/constant.rs
index 68a56044a07..11b6ab3cdf1 100644
--- a/compiler/rustc_codegen_ssa/src/mir/constant.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/constant.rs
@@ -20,7 +20,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
         OperandRef::from_const(bx, val, ty)
     }
 
-    pub fn eval_mir_constant(&self, constant: &mir::ConstOperand<'tcx>) -> mir::ConstValue<'tcx> {
+    pub fn eval_mir_constant(&self, constant: &mir::ConstOperand<'tcx>) -> mir::ConstValue {
         // `MirUsedCollector` visited all required_consts before codegen began, so if we got here
         // there can be no more constants that fail to evaluate.
         self.monomorphize(constant.const_)
diff --git a/compiler/rustc_codegen_ssa/src/mir/operand.rs b/compiler/rustc_codegen_ssa/src/mir/operand.rs
index 06bedaaa4a2..5459f95c186 100644
--- a/compiler/rustc_codegen_ssa/src/mir/operand.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/operand.rs
@@ -140,7 +140,7 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
 
     pub(crate) fn from_const<Bx: BuilderMethods<'a, 'tcx, Value = V>>(
         bx: &mut Bx,
-        val: mir::ConstValue<'tcx>,
+        val: mir::ConstValue,
         ty: Ty<'tcx>,
     ) -> Self {
         let layout = bx.layout_of(ty);
@@ -154,14 +154,11 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
                 OperandValue::Immediate(llval)
             }
             ConstValue::ZeroSized => return OperandRef::zero_sized(layout),
-            ConstValue::Slice { data, meta } => {
+            ConstValue::Slice { alloc_id, meta } => {
                 let BackendRepr::ScalarPair(a_scalar, _) = layout.backend_repr else {
                     bug!("from_const: invalid ScalarPair layout: {:#?}", layout);
                 };
-                let a = Scalar::from_pointer(
-                    Pointer::new(bx.tcx().reserve_and_set_memory_alloc(data).into(), Size::ZERO),
-                    &bx.tcx(),
-                );
+                let a = Scalar::from_pointer(Pointer::new(alloc_id.into(), Size::ZERO), &bx.tcx());
                 let a_llval = bx.scalar_to_backend(
                     a,
                     a_scalar,
@@ -338,13 +335,6 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
 
         let val = if field.is_zst() {
             OperandValue::ZeroSized
-        } else if let BackendRepr::SimdVector { .. } = self.layout.backend_repr {
-            // codegen_transmute_operand doesn't support SIMD, but since the previous
-            // check handled ZSTs, the only possible field access into something SIMD
-            // is to the `non_1zst_field` that's the same SIMD. (Other things, even
-            // just padding, would change the wrapper's representation type.)
-            assert_eq!(field.size, self.layout.size);
-            self.val
         } else if field.size == self.layout.size {
             assert_eq!(offset.bytes(), 0);
             fx.codegen_transmute_operand(bx, *self, field)
@@ -931,9 +921,10 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
 
         match self.locals[place_ref.local] {
             LocalRef::Operand(mut o) => {
-                // Moves out of scalar and scalar pair fields are trivial.
-                for elem in place_ref.projection.iter() {
-                    match elem {
+                // We only need to handle the projections that
+                // `LocalAnalyzer::process_place` let make it here.
+                for elem in place_ref.projection {
+                    match *elem {
                         mir::ProjectionElem::Field(f, _) => {
                             assert!(
                                 !o.layout.ty.is_any_ptr(),
@@ -942,17 +933,18 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                             );
                             o = o.extract_field(self, bx, f.index());
                         }
-                        mir::ProjectionElem::Index(_)
-                        | mir::ProjectionElem::ConstantIndex { .. } => {
-                            // ZSTs don't require any actual memory access.
-                            // FIXME(eddyb) deduplicate this with the identical
-                            // checks in `codegen_consume` and `extract_field`.
-                            let elem = o.layout.field(bx.cx(), 0);
-                            if elem.is_zst() {
-                                o = OperandRef::zero_sized(elem);
-                            } else {
-                                return None;
-                            }
+                        mir::PlaceElem::Downcast(_, vidx) => {
+                            debug_assert_eq!(
+                                o.layout.variants,
+                                abi::Variants::Single { index: vidx },
+                            );
+                            let layout = o.layout.for_variant(bx.cx(), vidx);
+                            o = OperandRef { val: o.val, layout }
+                        }
+                        mir::PlaceElem::Subtype(subtype_ty) => {
+                            let subtype_ty = self.monomorphize(subtype_ty);
+                            let layout = self.cx.layout_of(subtype_ty);
+                            o = OperandRef { val: o.val, layout }
                         }
                         _ => return None,
                     }
diff --git a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
index 610e2fd2311..8a67b8d6e5f 100644
--- a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
@@ -2,12 +2,12 @@ use rustc_abi::{self as abi, FIRST_VARIANT};
 use rustc_middle::ty::adjustment::PointerCoercion;
 use rustc_middle::ty::layout::{HasTyCtxt, HasTypingEnv, LayoutOf, TyAndLayout};
 use rustc_middle::ty::{self, Instance, Ty, TyCtxt};
-use rustc_middle::{bug, mir};
+use rustc_middle::{bug, mir, span_bug};
 use rustc_session::config::OptLevel;
 use tracing::{debug, instrument};
 
 use super::operand::{OperandRef, OperandRefBuilder, OperandValue};
-use super::place::{PlaceRef, codegen_tag_value};
+use super::place::{PlaceRef, PlaceValue, codegen_tag_value};
 use super::{FunctionCx, LocalRef};
 use crate::common::{IntPredicate, TypeKind};
 use crate::traits::*;
@@ -180,7 +180,6 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
             }
 
             _ => {
-                assert!(self.rvalue_creates_operand(rvalue));
                 let temp = self.codegen_rvalue_operand(bx, rvalue);
                 temp.val.store(bx, dest);
             }
@@ -218,17 +217,26 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
 
     /// Transmutes an `OperandValue` to another `OperandValue`.
     ///
-    /// This is supported only for cases where [`Self::rvalue_creates_operand`]
-    /// returns `true`, and will ICE otherwise. (In particular, anything that
-    /// would need to `alloca` in order to return a `PlaceValue` will ICE,
-    /// expecting those to go via [`Self::codegen_transmute`] instead where
-    /// the destination place is already allocated.)
+    /// This is supported for all cases where the `cast` type is SSA,
+    /// but for non-ZSTs with [`abi::BackendRepr::Memory`] it ICEs.
     pub(crate) fn codegen_transmute_operand(
         &mut self,
         bx: &mut Bx,
         operand: OperandRef<'tcx, Bx::Value>,
         cast: TyAndLayout<'tcx>,
     ) -> OperandValue<Bx::Value> {
+        if let abi::BackendRepr::Memory { .. } = cast.backend_repr
+            && !cast.is_zst()
+        {
+            span_bug!(self.mir.span, "Use `codegen_transmute` to transmute to {cast:?}");
+        }
+
+        // `Layout` is interned, so we can do a cheap check for things that are
+        // exactly the same and thus don't need any handling.
+        if abi::Layout::eq(&operand.layout.layout, &cast.layout) {
+            return operand.val;
+        }
+
         // Check for transmutes that are always UB.
         if operand.layout.size != cast.size
             || operand.layout.is_uninhabited()
@@ -241,11 +249,22 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
             return OperandValue::poison(bx, cast);
         }
 
+        // To or from pointers takes different methods, so we use this to restrict
+        // the SimdVector case to types which can be `bitcast` between each other.
+        #[inline]
+        fn vector_can_bitcast(x: abi::Scalar) -> bool {
+            matches!(
+                x,
+                abi::Scalar::Initialized {
+                    value: abi::Primitive::Int(..) | abi::Primitive::Float(..),
+                    ..
+                }
+            )
+        }
+
+        let cx = bx.cx();
         match (operand.val, operand.layout.backend_repr, cast.backend_repr) {
             _ if cast.is_zst() => OperandValue::ZeroSized,
-            (_, _, abi::BackendRepr::Memory { .. }) => {
-                bug!("Cannot `codegen_transmute_operand` to non-ZST memory-ABI output {cast:?}");
-            }
             (OperandValue::Ref(source_place_val), abi::BackendRepr::Memory { .. }, _) => {
                 assert_eq!(source_place_val.llextra, None);
                 // The existing alignment is part of `source_place_val`,
@@ -256,16 +275,46 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                 OperandValue::Immediate(imm),
                 abi::BackendRepr::Scalar(from_scalar),
                 abi::BackendRepr::Scalar(to_scalar),
-            ) => OperandValue::Immediate(transmute_scalar(bx, imm, from_scalar, to_scalar)),
+            ) if from_scalar.size(cx) == to_scalar.size(cx) => {
+                OperandValue::Immediate(transmute_scalar(bx, imm, from_scalar, to_scalar))
+            }
+            (
+                OperandValue::Immediate(imm),
+                abi::BackendRepr::SimdVector { element: from_scalar, .. },
+                abi::BackendRepr::SimdVector { element: to_scalar, .. },
+            ) if vector_can_bitcast(from_scalar) && vector_can_bitcast(to_scalar) => {
+                let to_backend_ty = bx.cx().immediate_backend_type(cast);
+                OperandValue::Immediate(bx.bitcast(imm, to_backend_ty))
+            }
             (
                 OperandValue::Pair(imm_a, imm_b),
                 abi::BackendRepr::ScalarPair(in_a, in_b),
                 abi::BackendRepr::ScalarPair(out_a, out_b),
-            ) => OperandValue::Pair(
-                transmute_scalar(bx, imm_a, in_a, out_a),
-                transmute_scalar(bx, imm_b, in_b, out_b),
-            ),
-            _ => bug!("Cannot `codegen_transmute_operand` {operand:?} to {cast:?}"),
+            ) if in_a.size(cx) == out_a.size(cx) && in_b.size(cx) == out_b.size(cx) => {
+                OperandValue::Pair(
+                    transmute_scalar(bx, imm_a, in_a, out_a),
+                    transmute_scalar(bx, imm_b, in_b, out_b),
+                )
+            }
+            _ => {
+                // For any other potentially-tricky cases, make a temporary instead.
+                // If anything else wants the target local to be in memory this won't
+                // be hit, as `codegen_transmute` will get called directly. Thus this
+                // is only for places where everything else wants the operand form,
+                // and thus it's not worth making those places get it from memory.
+                //
+                // Notably, Scalar ⇌ ScalarPair cases go here to avoid padding
+                // and endianness issues, as do SimdVector ones to avoid worrying
+                // about things like f32x8 ⇌ ptrx4 that would need multiple steps.
+                let align = Ord::max(operand.layout.align.abi, cast.align.abi);
+                let size = Ord::max(operand.layout.size, cast.size);
+                let temp = PlaceValue::alloca(bx, size, align);
+                bx.lifetime_start(temp.llval, size);
+                operand.val.store(bx, temp.with_type(operand.layout));
+                let val = bx.load_operand(temp.with_type(cast)).val;
+                bx.lifetime_end(temp.llval, size);
+                val
+            }
         }
     }
 
@@ -288,7 +337,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
         // valid ranges. For example, `char`s are passed as just `i32`, with no
         // way for LLVM to know that they're 0x10FFFF at most. Thus we assume
         // the range of the input value too, not just the output range.
-        assume_scalar_range(bx, imm, from_scalar, from_backend_ty);
+        assume_scalar_range(bx, imm, from_scalar, from_backend_ty, None);
 
         imm = match (from_scalar.primitive(), to_scalar.primitive()) {
             (Int(_, is_signed), Int(..)) => bx.intcast(imm, to_backend_ty, is_signed),
@@ -326,8 +375,6 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
         bx: &mut Bx,
         rvalue: &mir::Rvalue<'tcx>,
     ) -> OperandRef<'tcx, Bx::Value> {
-        assert!(self.rvalue_creates_operand(rvalue), "cannot codegen {rvalue:?} to operand",);
-
         match *rvalue {
             mir::Rvalue::Cast(ref kind, ref source, mir_cast_ty) => {
                 let operand = self.codegen_operand(bx, source);
@@ -653,8 +700,6 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                 let ty = self.monomorphize(ty);
                 let layout = self.cx.layout_of(ty);
 
-                // `rvalue_creates_operand` has arranged that we only get here if
-                // we can build the aggregate immediate from the field immediates.
                 let mut builder = OperandRefBuilder::new(layout);
                 for (field_idx, field) in fields.iter_enumerated() {
                     let op = self.codegen_operand(bx, field);
@@ -955,69 +1000,6 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
 
         OperandValue::Pair(val, of)
     }
-
-    /// Returns `true` if the `rvalue` can be computed into an [`OperandRef`],
-    /// rather than needing a full `PlaceRef` for the assignment destination.
-    ///
-    /// This is used by the [`super::analyze`] code to decide which MIR locals
-    /// can stay as SSA values (as opposed to generating `alloca` slots for them).
-    /// As such, some paths here return `true` even where the specific rvalue
-    /// will not actually take the operand path because the result type is such
-    /// that it always gets an `alloca`, but where it's not worth re-checking the
-    /// layout in this code when the right thing will happen anyway.
-    pub(crate) fn rvalue_creates_operand(&self, rvalue: &mir::Rvalue<'tcx>) -> bool {
-        match *rvalue {
-            mir::Rvalue::Cast(mir::CastKind::Transmute, ref operand, cast_ty) => {
-                let operand_ty = operand.ty(self.mir, self.cx.tcx());
-                let cast_layout = self.cx.layout_of(self.monomorphize(cast_ty));
-                let operand_layout = self.cx.layout_of(self.monomorphize(operand_ty));
-                match (operand_layout.backend_repr, cast_layout.backend_repr) {
-                    // When the output will be in memory anyway, just use its place
-                    // (instead of the operand path) unless it's the trivial ZST case.
-                    (_, abi::BackendRepr::Memory { .. }) => cast_layout.is_zst(),
-
-                    // Otherwise (for a non-memory output) if the input is memory
-                    // then we can just read the value from the place.
-                    (abi::BackendRepr::Memory { .. }, _) => true,
-
-                    // When we have scalar immediates, we can only convert things
-                    // where the sizes match, to avoid endianness questions.
-                    (abi::BackendRepr::Scalar(a), abi::BackendRepr::Scalar(b)) =>
-                        a.size(self.cx) == b.size(self.cx),
-                    (abi::BackendRepr::ScalarPair(a0, a1), abi::BackendRepr::ScalarPair(b0, b1)) =>
-                        a0.size(self.cx) == b0.size(self.cx) && a1.size(self.cx) == b1.size(self.cx),
-
-                    // Mixing Scalars and ScalarPairs can get quite complicated when
-                    // padding and undef get involved, so leave that to the memory path.
-                    (abi::BackendRepr::Scalar(_), abi::BackendRepr::ScalarPair(_, _)) |
-                    (abi::BackendRepr::ScalarPair(_, _), abi::BackendRepr::Scalar(_)) => false,
-
-                    // SIMD vectors aren't worth the trouble of dealing with complex
-                    // cases like from vectors of f32 to vectors of pointers or
-                    // from fat pointers to vectors of u16. (See #143194 #110021 ...)
-                    (abi::BackendRepr::SimdVector { .. }, _) | (_, abi::BackendRepr::SimdVector { .. }) => false,
-                }
-            }
-            mir::Rvalue::Ref(..) |
-            mir::Rvalue::CopyForDeref(..) |
-            mir::Rvalue::RawPtr(..) |
-            mir::Rvalue::Len(..) |
-            mir::Rvalue::Cast(..) | // (*)
-            mir::Rvalue::ShallowInitBox(..) | // (*)
-            mir::Rvalue::BinaryOp(..) |
-            mir::Rvalue::UnaryOp(..) |
-            mir::Rvalue::Discriminant(..) |
-            mir::Rvalue::NullaryOp(..) |
-            mir::Rvalue::ThreadLocalRef(_) |
-            mir::Rvalue::Use(..) |
-            mir::Rvalue::Repeat(..) | // (*)
-            mir::Rvalue::Aggregate(..) | // (*)
-            mir::Rvalue::WrapUnsafeBinder(..) => // (*)
-                true,
-        }
-
-        // (*) this is only true if the type is suitable
-    }
 }
 
 /// Transmutes a single scalar value `imm` from `from_scalar` to `to_scalar`.
@@ -1064,7 +1046,7 @@ pub(super) fn transmute_scalar<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
     // That said, last time we tried removing this, it didn't actually help
     // the rustc-perf results, so might as well keep doing it
     // <https://github.com/rust-lang/rust/pull/135610#issuecomment-2599275182>
-    assume_scalar_range(bx, imm, from_scalar, from_backend_ty);
+    assume_scalar_range(bx, imm, from_scalar, from_backend_ty, Some(&to_scalar));
 
     imm = match (from_scalar.primitive(), to_scalar.primitive()) {
         (Int(..) | Float(_), Int(..) | Float(_)) => bx.bitcast(imm, to_backend_ty),
@@ -1092,22 +1074,42 @@ pub(super) fn transmute_scalar<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
     // since it's never passed to something with parameter metadata (especially
     // after MIR inlining) so the only way to tell the backend about the
     // constraint that the `transmute` introduced is to `assume` it.
-    assume_scalar_range(bx, imm, to_scalar, to_backend_ty);
+    assume_scalar_range(bx, imm, to_scalar, to_backend_ty, Some(&from_scalar));
 
     imm = bx.to_immediate_scalar(imm, to_scalar);
     imm
 }
 
+/// Emits an `assume` call that `imm`'s value is within the known range of `scalar`.
+///
+/// If `known` is `Some`, only emits the assume if it's more specific than
+/// whatever is already known from the range of *that* scalar.
 fn assume_scalar_range<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
     bx: &mut Bx,
     imm: Bx::Value,
     scalar: abi::Scalar,
     backend_ty: Bx::Type,
+    known: Option<&abi::Scalar>,
 ) {
-    if matches!(bx.cx().sess().opts.optimize, OptLevel::No) || scalar.is_always_valid(bx.cx()) {
+    if matches!(bx.cx().sess().opts.optimize, OptLevel::No) {
         return;
     }
 
+    match (scalar, known) {
+        (abi::Scalar::Union { .. }, _) => return,
+        (_, None) => {
+            if scalar.is_always_valid(bx.cx()) {
+                return;
+            }
+        }
+        (abi::Scalar::Initialized { valid_range, .. }, Some(known)) => {
+            let known_range = known.valid_range(bx.cx());
+            if valid_range.contains_range(known_range, scalar.size(bx.cx())) {
+                return;
+            }
+        }
+    }
+
     match scalar.primitive() {
         abi::Primitive::Int(..) => {
             let range = scalar.valid_range(bx.cx());
diff --git a/compiler/rustc_codegen_ssa/src/traits/write.rs b/compiler/rustc_codegen_ssa/src/traits/write.rs
index 5e993640472..f391c198e1a 100644
--- a/compiler/rustc_codegen_ssa/src/traits/write.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/write.rs
@@ -1,3 +1,5 @@
+use std::path::PathBuf;
+
 use rustc_ast::expand::autodiff_attrs::AutoDiffItem;
 use rustc_errors::{DiagCtxtHandle, FatalError};
 use rustc_middle::dep_graph::WorkProduct;
@@ -14,18 +16,13 @@ pub trait WriteBackendMethods: Clone + 'static {
     type ThinData: Send + Sync;
     type ThinBuffer: ThinBufferMethods;
 
-    /// Merge all modules into main_module and returning it
-    fn run_link(
-        cgcx: &CodegenContext<Self>,
-        dcx: DiagCtxtHandle<'_>,
-        modules: Vec<ModuleCodegen<Self::Module>>,
-    ) -> Result<ModuleCodegen<Self::Module>, FatalError>;
     /// Performs fat LTO by merging all modules into a single one, running autodiff
     /// if necessary and running any further optimizations
     fn run_and_optimize_fat_lto(
         cgcx: &CodegenContext<Self>,
+        exported_symbols_for_lto: &[String],
+        each_linked_rlib_for_lto: &[PathBuf],
         modules: Vec<FatLtoInput<Self>>,
-        cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
         diff_fncs: Vec<AutoDiffItem>,
     ) -> Result<ModuleCodegen<Self::Module>, FatalError>;
     /// Performs thin LTO by performing necessary global analysis and returning two
@@ -33,6 +30,8 @@ pub trait WriteBackendMethods: Clone + 'static {
     /// can simply be copied over from the incr. comp. cache.
     fn run_thin_lto(
         cgcx: &CodegenContext<Self>,
+        exported_symbols_for_lto: &[String],
+        each_linked_rlib_for_lto: &[PathBuf],
         modules: Vec<(String, Self::ThinBuffer)>,
         cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
     ) -> Result<(Vec<ThinModule<Self>>, Vec<WorkProduct>), FatalError>;