about summary refs log tree commit diff
path: root/compiler/rustc_codegen_llvm/src
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_codegen_llvm/src')
-rw-r--r--compiler/rustc_codegen_llvm/src/abi.rs17
-rw-r--r--compiler/rustc_codegen_llvm/src/attributes.rs50
-rw-r--r--compiler/rustc_codegen_llvm/src/back/lto.rs14
-rw-r--r--compiler/rustc_codegen_llvm/src/back/write.rs77
-rw-r--r--compiler/rustc_codegen_llvm/src/builder.rs62
-rw-r--r--compiler/rustc_codegen_llvm/src/builder/autodiff.rs423
-rw-r--r--compiler/rustc_codegen_llvm/src/common.rs16
-rw-r--r--compiler/rustc_codegen_llvm/src/consts.rs9
-rw-r--r--compiler/rustc_codegen_llvm/src/context.rs74
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs19
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs16
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs299
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs85
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs75
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs170
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs4
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs207
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs53
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs6
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs39
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs53
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/mod.rs45
-rw-r--r--compiler/rustc_codegen_llvm/src/errors.rs4
-rw-r--r--compiler/rustc_codegen_llvm/src/intrinsic.rs125
-rw-r--r--compiler/rustc_codegen_llvm/src/lib.rs6
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/archive_ro.rs6
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/diagnostic.rs20
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs48
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/ffi.rs186
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/mod.rs20
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm_util.rs22
-rw-r--r--compiler/rustc_codegen_llvm/src/mono_item.rs2
-rw-r--r--compiler/rustc_codegen_llvm/src/va_arg.rs2
33 files changed, 1309 insertions, 945 deletions
diff --git a/compiler/rustc_codegen_llvm/src/abi.rs b/compiler/rustc_codegen_llvm/src/abi.rs
index 71059338151..8294e29d07d 100644
--- a/compiler/rustc_codegen_llvm/src/abi.rs
+++ b/compiler/rustc_codegen_llvm/src/abi.rs
@@ -17,14 +17,13 @@ use rustc_target::callconv::{
 use rustc_target::spec::SanitizerSet;
 use smallvec::SmallVec;
 
-use crate::attributes::llfn_attrs_from_instance;
+use crate::attributes::{self, llfn_attrs_from_instance};
 use crate::builder::Builder;
 use crate::context::CodegenCx;
 use crate::llvm::{self, Attribute, AttributePlace};
 use crate::type_::Type;
 use crate::type_of::LayoutLlvmExt;
 use crate::value::Value;
-use crate::{attributes, llvm_util};
 
 trait ArgAttributesExt {
     fn apply_attrs_to_llfn(&self, idx: AttributePlace, cx: &CodegenCx<'_, '_>, llfn: &Value);
@@ -437,7 +436,6 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
 
         let apply_range_attr = |idx: AttributePlace, scalar: rustc_abi::Scalar| {
             if cx.sess().opts.optimize != config::OptLevel::No
-                && llvm_util::get_version() >= (19, 0, 0)
                 && matches!(scalar.primitive(), Primitive::Int(..))
                 // If the value is a boolean, the range is 0..2 and that ultimately
                 // become 0..0 when the type becomes i1, which would be rejected
@@ -571,19 +569,6 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
             }
             _ => {}
         }
-        if bx.cx.sess().opts.optimize != config::OptLevel::No
-                && llvm_util::get_version() < (19, 0, 0)
-                && let BackendRepr::Scalar(scalar) = self.ret.layout.backend_repr
-                && matches!(scalar.primitive(), Primitive::Int(..))
-                // If the value is a boolean, the range is 0..2 and that ultimately
-                // become 0..0 when the type becomes i1, which would be rejected
-                // by the LLVM verifier.
-                && !scalar.is_bool()
-                // LLVM also rejects full range.
-                && !scalar.is_always_valid(bx)
-        {
-            bx.range_metadata(callsite, scalar.valid_range(bx));
-        }
         for arg in self.args.iter() {
             match &arg.mode {
                 PassMode::Ignore => {}
diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs
index 3d7afa17bdf..e8c42d16733 100644
--- a/compiler/rustc_codegen_llvm/src/attributes.rs
+++ b/compiler/rustc_codegen_llvm/src/attributes.rs
@@ -407,30 +407,28 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>(
         // Do not set sanitizer attributes for naked functions.
         to_add.extend(sanitize_attrs(cx, codegen_fn_attrs.no_sanitize));
 
-        if llvm_util::get_version() >= (19, 0, 0) {
-            // For non-naked functions, set branch protection attributes on aarch64.
-            if let Some(BranchProtection { bti, pac_ret }) =
-                cx.sess().opts.unstable_opts.branch_protection
-            {
-                assert!(cx.sess().target.arch == "aarch64");
-                if bti {
-                    to_add.push(llvm::CreateAttrString(cx.llcx, "branch-target-enforcement"));
-                }
-                if let Some(PacRet { leaf, pc, key }) = pac_ret {
-                    if pc {
-                        to_add.push(llvm::CreateAttrString(cx.llcx, "branch-protection-pauth-lr"));
-                    }
-                    to_add.push(llvm::CreateAttrStringValue(
-                        cx.llcx,
-                        "sign-return-address",
-                        if leaf { "all" } else { "non-leaf" },
-                    ));
-                    to_add.push(llvm::CreateAttrStringValue(
-                        cx.llcx,
-                        "sign-return-address-key",
-                        if key == PAuthKey::A { "a_key" } else { "b_key" },
-                    ));
+        // For non-naked functions, set branch protection attributes on aarch64.
+        if let Some(BranchProtection { bti, pac_ret }) =
+            cx.sess().opts.unstable_opts.branch_protection
+        {
+            assert!(cx.sess().target.arch == "aarch64");
+            if bti {
+                to_add.push(llvm::CreateAttrString(cx.llcx, "branch-target-enforcement"));
+            }
+            if let Some(PacRet { leaf, pc, key }) = pac_ret {
+                if pc {
+                    to_add.push(llvm::CreateAttrString(cx.llcx, "branch-protection-pauth-lr"));
                 }
+                to_add.push(llvm::CreateAttrStringValue(
+                    cx.llcx,
+                    "sign-return-address",
+                    if leaf { "all" } else { "non-leaf" },
+                ));
+                to_add.push(llvm::CreateAttrStringValue(
+                    cx.llcx,
+                    "sign-return-address-key",
+                    if key == PAuthKey::A { "a_key" } else { "b_key" },
+                ));
             }
         }
     }
@@ -510,12 +508,6 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>(
             InstructionSetAttr::ArmA32 => "-thumb-mode".to_string(),
             InstructionSetAttr::ArmT32 => "+thumb-mode".to_string(),
         }))
-        // HACK: LLVM versions 19+ do not have the FPMR feature and treat it as always enabled
-        // It only exists as a feature in LLVM 18, cannot be passed down for any other version
-        .chain(match &*cx.tcx.sess.target.arch {
-            "aarch64" if llvm_util::get_version().0 == 18 => vec!["+fpmr".to_string()],
-            _ => vec![],
-        })
         .collect::<Vec<String>>();
 
     if cx.tcx.sess.target.is_like_wasm {
diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs
index 668795191a2..a8b49e9552c 100644
--- a/compiler/rustc_codegen_llvm/src/back/lto.rs
+++ b/compiler/rustc_codegen_llvm/src/back/lto.rs
@@ -610,6 +610,8 @@ fn enable_autodiff_settings(ad: &[config::AutoDiff], module: &mut ModuleCodegen<
             }
             // We handle this below
             config::AutoDiff::PrintModAfter => {}
+            // We handle this below
+            config::AutoDiff::PrintModFinal => {}
             // This is required and already checked
             config::AutoDiff::Enable => {}
         }
@@ -657,14 +659,20 @@ pub(crate) fn run_pass_manager(
     }
 
     if cfg!(llvm_enzyme) && enable_ad {
+        // This is the post-autodiff IR, mainly used for testing and educational purposes.
+        if config.autodiff.contains(&config::AutoDiff::PrintModAfter) {
+            unsafe { llvm::LLVMDumpModule(module.module_llvm.llmod()) };
+        }
+
         let opt_stage = llvm::OptStage::FatLTO;
         let stage = write::AutodiffStage::PostAD;
         unsafe {
             write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?;
         }
 
-        // This is the final IR, so people should be able to inspect the optimized autodiff output.
-        if config.autodiff.contains(&config::AutoDiff::PrintModAfter) {
+        // This is the final IR, so people should be able to inspect the optimized autodiff output,
+        // for manual inspection.
+        if config.autodiff.contains(&config::AutoDiff::PrintModFinal) {
             unsafe { llvm::LLVMDumpModule(module.module_llvm.llmod()) };
         }
     }
@@ -728,7 +736,7 @@ impl ThinBuffer {
         }
     }
 
-    pub unsafe fn from_raw_ptr(ptr: *mut llvm::ThinLTOBuffer) -> ThinBuffer {
+    pub(crate) unsafe fn from_raw_ptr(ptr: *mut llvm::ThinLTOBuffer) -> ThinBuffer {
         let mut ptr = NonNull::new(ptr).unwrap();
         ThinBuffer(unsafe { ptr.as_mut() })
     }
diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs
index bead4c82a81..18d221d232e 100644
--- a/compiler/rustc_codegen_llvm/src/back/write.rs
+++ b/compiler/rustc_codegen_llvm/src/back/write.rs
@@ -119,14 +119,18 @@ pub(crate) fn create_target_machine(tcx: TyCtxt<'_>, mod_name: &str) -> OwnedTar
         tcx.output_filenames(()).split_dwarf_path(
             tcx.sess.split_debuginfo(),
             tcx.sess.opts.unstable_opts.split_dwarf_kind,
-            Some(mod_name),
+            mod_name,
+            tcx.sess.invocation_temp.as_deref(),
         )
     } else {
         None
     };
 
-    let output_obj_file =
-        Some(tcx.output_filenames(()).temp_path(OutputType::Object, Some(mod_name)));
+    let output_obj_file = Some(tcx.output_filenames(()).temp_path_for_cgu(
+        OutputType::Object,
+        mod_name,
+        tcx.sess.invocation_temp.as_deref(),
+    ));
     let config = TargetMachineFactoryConfig { split_dwarf_file, output_obj_file };
 
     target_machine_factory(
@@ -330,8 +334,11 @@ pub(crate) fn save_temp_bitcode(
         return;
     }
     let ext = format!("{name}.bc");
-    let cgu = Some(&module.name[..]);
-    let path = cgcx.output_filenames.temp_path_ext(&ext, cgu);
+    let path = cgcx.output_filenames.temp_path_ext_for_cgu(
+        &ext,
+        &module.name,
+        cgcx.invocation_temp.as_deref(),
+    );
     write_bitcode_to_file(module, &path)
 }
 
@@ -439,12 +446,9 @@ fn report_inline_asm(
     let span = if cookie == 0 || matches!(cgcx.lto, Lto::Fat | Lto::Thin) {
         SpanData::default()
     } else {
-        let lo = BytePos::from_u32(cookie as u32);
-        let hi = BytePos::from_u32((cookie >> 32) as u32);
         SpanData {
-            lo,
-            // LLVM version < 19 silently truncates the cookie to 32 bits in some situations.
-            hi: if hi.to_u32() != 0 { hi } else { lo },
+            lo: BytePos::from_u32(cookie as u32),
+            hi: BytePos::from_u32((cookie >> 32) as u32),
             ctxt: SyntaxContext::root(),
             parent: None,
         }
@@ -697,11 +701,12 @@ pub(crate) unsafe fn optimize(
     let llcx = &*module.module_llvm.llcx;
     let _handlers = DiagnosticHandlers::new(cgcx, dcx, llcx, module, CodegenDiagnosticsStage::Opt);
 
-    let module_name = module.name.clone();
-    let module_name = Some(&module_name[..]);
-
     if config.emit_no_opt_bc {
-        let out = cgcx.output_filenames.temp_path_ext("no-opt.bc", module_name);
+        let out = cgcx.output_filenames.temp_path_ext_for_cgu(
+            "no-opt.bc",
+            &module.name,
+            cgcx.invocation_temp.as_deref(),
+        );
         write_bitcode_to_file(module, &out)
     }
 
@@ -746,8 +751,11 @@ pub(crate) unsafe fn optimize(
         if let Some(thin_lto_buffer) = thin_lto_buffer {
             let thin_lto_buffer = unsafe { ThinBuffer::from_raw_ptr(thin_lto_buffer) };
             module.thin_lto_buffer = Some(thin_lto_buffer.data().to_vec());
-            let bc_summary_out =
-                cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
+            let bc_summary_out = cgcx.output_filenames.temp_path_for_cgu(
+                OutputType::ThinLinkBitcode,
+                &module.name,
+                cgcx.invocation_temp.as_deref(),
+            );
             if config.emit_thin_lto_summary
                 && let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
             {
@@ -804,8 +812,6 @@ pub(crate) unsafe fn codegen(
         let llmod = module.module_llvm.llmod();
         let llcx = &*module.module_llvm.llcx;
         let tm = &*module.module_llvm.tm;
-        let module_name = module.name.clone();
-        let module_name = Some(&module_name[..]);
         let _handlers =
             DiagnosticHandlers::new(cgcx, dcx, llcx, &module, CodegenDiagnosticsStage::Codegen);
 
@@ -817,8 +823,16 @@ pub(crate) unsafe fn codegen(
         // copy it to the .o file, and delete the bitcode if it wasn't
         // otherwise requested.
 
-        let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
-        let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name);
+        let bc_out = cgcx.output_filenames.temp_path_for_cgu(
+            OutputType::Bitcode,
+            &module.name,
+            cgcx.invocation_temp.as_deref(),
+        );
+        let obj_out = cgcx.output_filenames.temp_path_for_cgu(
+            OutputType::Object,
+            &module.name,
+            cgcx.invocation_temp.as_deref(),
+        );
 
         if config.bitcode_needed() {
             if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
@@ -860,7 +874,11 @@ pub(crate) unsafe fn codegen(
         if config.emit_ir {
             let _timer =
                 cgcx.prof.generic_activity_with_arg("LLVM_module_codegen_emit_ir", &*module.name);
-            let out = cgcx.output_filenames.temp_path(OutputType::LlvmAssembly, module_name);
+            let out = cgcx.output_filenames.temp_path_for_cgu(
+                OutputType::LlvmAssembly,
+                &module.name,
+                cgcx.invocation_temp.as_deref(),
+            );
             let out_c = path_to_c_string(&out);
 
             extern "C" fn demangle_callback(
@@ -902,7 +920,11 @@ pub(crate) unsafe fn codegen(
         if config.emit_asm {
             let _timer =
                 cgcx.prof.generic_activity_with_arg("LLVM_module_codegen_emit_asm", &*module.name);
-            let path = cgcx.output_filenames.temp_path(OutputType::Assembly, module_name);
+            let path = cgcx.output_filenames.temp_path_for_cgu(
+                OutputType::Assembly,
+                &module.name,
+                cgcx.invocation_temp.as_deref(),
+            );
 
             // We can't use the same module for asm and object code output,
             // because that triggers various errors like invalid IR or broken
@@ -932,7 +954,9 @@ pub(crate) unsafe fn codegen(
                     .prof
                     .generic_activity_with_arg("LLVM_module_codegen_emit_obj", &*module.name);
 
-                let dwo_out = cgcx.output_filenames.temp_path_dwo(module_name);
+                let dwo_out = cgcx
+                    .output_filenames
+                    .temp_path_dwo_for_cgu(&module.name, cgcx.invocation_temp.as_deref());
                 let dwo_out = match (cgcx.split_debuginfo, cgcx.split_dwarf_kind) {
                     // Don't change how DWARF is emitted when disabled.
                     (SplitDebuginfo::Off, _) => None,
@@ -997,6 +1021,7 @@ pub(crate) unsafe fn codegen(
         config.emit_asm,
         config.emit_ir,
         &cgcx.output_filenames,
+        cgcx.invocation_temp.as_deref(),
     ))
 }
 
@@ -1024,7 +1049,7 @@ fn create_section_with_flags_asm(section_name: &str, section_flags: &str, data:
 }
 
 pub(crate) fn bitcode_section_name(cgcx: &CodegenContext<LlvmCodegenBackend>) -> &'static CStr {
-    if cgcx.target_is_like_osx {
+    if cgcx.target_is_like_darwin {
         c"__LLVM,__bitcode"
     } else if cgcx.target_is_like_aix {
         c".ipa"
@@ -1077,7 +1102,7 @@ unsafe fn embed_bitcode(
     // and COFF we emit the sections using module level inline assembly for that
     // reason (see issue #90326 for historical background).
     unsafe {
-        if cgcx.target_is_like_osx
+        if cgcx.target_is_like_darwin
             || cgcx.target_is_like_aix
             || cgcx.target_arch == "wasm32"
             || cgcx.target_arch == "wasm64"
@@ -1096,7 +1121,7 @@ unsafe fn embed_bitcode(
             let llglobal =
                 llvm::add_global(llmod, common::val_ty(llconst), c"rustc.embedded.cmdline");
             llvm::set_initializer(llglobal, llconst);
-            let section = if cgcx.target_is_like_osx {
+            let section = if cgcx.target_is_like_darwin {
                 c"__LLVM,__cmdline"
             } else if cgcx.target_is_like_aix {
                 c".info"
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index 55d34f5f2ef..27f7f95f100 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -14,6 +14,7 @@ use rustc_codegen_ssa::mir::place::PlaceRef;
 use rustc_codegen_ssa::traits::*;
 use rustc_data_structures::small_c_str::SmallCStr;
 use rustc_hir::def_id::DefId;
+use rustc_middle::bug;
 use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
 use rustc_middle::ty::layout::{
     FnAbiError, FnAbiOfHelpers, FnAbiRequest, HasTypingEnv, LayoutError, LayoutOfHelpers,
@@ -29,6 +30,7 @@ use smallvec::SmallVec;
 use tracing::{debug, instrument};
 
 use crate::abi::FnAbiLlvmExt;
+use crate::attributes;
 use crate::common::Funclet;
 use crate::context::{CodegenCx, FullCx, GenericCx, SCx};
 use crate::llvm::{
@@ -37,7 +39,6 @@ use crate::llvm::{
 use crate::type_::Type;
 use crate::type_of::LayoutLlvmExt;
 use crate::value::Value;
-use crate::{attributes, llvm_util};
 
 #[must_use]
 pub(crate) struct GenericBuilder<'a, 'll, CX: Borrow<SCx<'ll>>> {
@@ -122,7 +123,7 @@ impl<'a, 'll, CX: Borrow<SCx<'ll>>> GenericBuilder<'a, 'll, CX> {
 /// Empty string, to be used where LLVM expects an instruction name, indicating
 /// that the instruction is to be left unnamed (i.e. numbered, in textual IR).
 // FIXME(eddyb) pass `&CStr` directly to FFI once it's a thin pointer.
-const UNNAMED: *const c_char = c"".as_ptr();
+pub(crate) const UNNAMED: *const c_char = c"".as_ptr();
 
 impl<'ll, CX: Borrow<SCx<'ll>>> BackendTypes for GenericBuilder<'_, 'll, CX> {
     type Value = <GenericCx<'ll, CX> as BackendTypes>::Value;
@@ -926,11 +927,9 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         debug_assert_ne!(self.val_ty(val), dest_ty);
 
         let trunc = self.trunc(val, dest_ty);
-        if llvm_util::get_version() >= (19, 0, 0) {
-            unsafe {
-                if llvm::LLVMIsAInstruction(trunc).is_some() {
-                    llvm::LLVMSetNUW(trunc, True);
-                }
+        unsafe {
+            if llvm::LLVMIsAInstruction(trunc).is_some() {
+                llvm::LLVMSetNUW(trunc, True);
             }
         }
         trunc
@@ -940,11 +939,9 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         debug_assert_ne!(self.val_ty(val), dest_ty);
 
         let trunc = self.trunc(val, dest_ty);
-        if llvm_util::get_version() >= (19, 0, 0) {
-            unsafe {
-                if llvm::LLVMIsAInstruction(trunc).is_some() {
-                    llvm::LLVMSetNSW(trunc, True);
-                }
+        unsafe {
+            if llvm::LLVMIsAInstruction(trunc).is_some() {
+                llvm::LLVMSetNSW(trunc, True);
             }
         }
         trunc
@@ -1074,6 +1071,35 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         unsafe { llvm::LLVMBuildFCmp(self.llbuilder, op as c_uint, lhs, rhs, UNNAMED) }
     }
 
+    fn three_way_compare(
+        &mut self,
+        ty: Ty<'tcx>,
+        lhs: Self::Value,
+        rhs: Self::Value,
+    ) -> Option<Self::Value> {
+        // FIXME: See comment on the definition of `three_way_compare`.
+        if crate::llvm_util::get_version() < (20, 0, 0) {
+            return None;
+        }
+
+        let name = match (ty.is_signed(), ty.primitive_size(self.tcx).bits()) {
+            (true, 8) => "llvm.scmp.i8.i8",
+            (true, 16) => "llvm.scmp.i8.i16",
+            (true, 32) => "llvm.scmp.i8.i32",
+            (true, 64) => "llvm.scmp.i8.i64",
+            (true, 128) => "llvm.scmp.i8.i128",
+
+            (false, 8) => "llvm.ucmp.i8.i8",
+            (false, 16) => "llvm.ucmp.i8.i16",
+            (false, 32) => "llvm.ucmp.i8.i32",
+            (false, 64) => "llvm.ucmp.i8.i64",
+            (false, 128) => "llvm.ucmp.i8.i128",
+
+            _ => bug!("three-way compare unsupported for type {ty:?}"),
+        };
+        Some(self.call_intrinsic(name, &[lhs, rhs]))
+    }
+
     /* Miscellaneous instructions */
     fn memcpy(
         &mut self,
@@ -1869,10 +1895,6 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
         hash: &'ll Value,
         bitmap_bits: &'ll Value,
     ) {
-        assert!(
-            crate::llvm_util::get_version() >= (19, 0, 0),
-            "MCDC intrinsics require LLVM 19 or later"
-        );
         self.call_intrinsic("llvm.instrprof.mcdc.parameters", &[fn_name, hash, bitmap_bits]);
     }
 
@@ -1884,10 +1906,6 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
         bitmap_index: &'ll Value,
         mcdc_temp: &'ll Value,
     ) {
-        assert!(
-            crate::llvm_util::get_version() >= (19, 0, 0),
-            "MCDC intrinsics require LLVM 19 or later"
-        );
         let args = &[fn_name, hash, bitmap_index, mcdc_temp];
         self.call_intrinsic("llvm.instrprof.mcdc.tvbitmap.update", args);
     }
@@ -1899,10 +1917,6 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
 
     #[instrument(level = "debug", skip(self))]
     pub(crate) fn mcdc_condbitmap_update(&mut self, cond_index: &'ll Value, mcdc_temp: &'ll Value) {
-        assert!(
-            crate::llvm_util::get_version() >= (19, 0, 0),
-            "MCDC intrinsics require LLVM 19 or later"
-        );
         let align = self.tcx.data_layout.i32_align.abi;
         let current_tv_index = self.load(self.cx.type_i32(), mcdc_temp, align);
         let new_tv_index = self.add(current_tv_index, cond_index);
diff --git a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
index 71705ecb4d0..e7c071d05aa 100644
--- a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
+++ b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
@@ -3,12 +3,14 @@ use std::ptr;
 use rustc_ast::expand::autodiff_attrs::{AutoDiffAttrs, AutoDiffItem, DiffActivity, DiffMode};
 use rustc_codegen_ssa::ModuleCodegen;
 use rustc_codegen_ssa::back::write::ModuleConfig;
-use rustc_codegen_ssa::traits::BaseTypeCodegenMethods as _;
+use rustc_codegen_ssa::common::TypeKind;
+use rustc_codegen_ssa::traits::BaseTypeCodegenMethods;
 use rustc_errors::FatalError;
+use rustc_middle::bug;
 use tracing::{debug, trace};
 
 use crate::back::write::llvm_err;
-use crate::builder::SBuilder;
+use crate::builder::{SBuilder, UNNAMED};
 use crate::context::SimpleCx;
 use crate::declare::declare_simple_fn;
 use crate::errors::{AutoDiffWithoutEnable, LlvmError};
@@ -18,14 +20,264 @@ use crate::value::Value;
 use crate::{CodegenContext, LlvmCodegenBackend, ModuleLlvm, attributes, llvm};
 
 fn get_params(fnc: &Value) -> Vec<&Value> {
+    let param_num = llvm::LLVMCountParams(fnc) as usize;
+    let mut fnc_args: Vec<&Value> = vec![];
+    fnc_args.reserve(param_num);
     unsafe {
-        let param_num = llvm::LLVMCountParams(fnc) as usize;
-        let mut fnc_args: Vec<&Value> = vec![];
-        fnc_args.reserve(param_num);
         llvm::LLVMGetParams(fnc, fnc_args.as_mut_ptr());
         fnc_args.set_len(param_num);
-        fnc_args
     }
+    fnc_args
+}
+
+fn has_sret(fnc: &Value) -> bool {
+    let num_args = llvm::LLVMCountParams(fnc) as usize;
+    if num_args == 0 {
+        false
+    } else {
+        unsafe { llvm::LLVMRustHasAttributeAtIndex(fnc, 0, llvm::AttributeKind::StructRet) }
+    }
+}
+
+// When we call the `__enzyme_autodiff` or `__enzyme_fwddiff` function, we need to pass all the
+// original inputs, as well as metadata and the additional shadow arguments.
+// This function matches the arguments from the outer function to the inner enzyme call.
+//
+// This function also considers that Rust level arguments not always match the llvm-ir level
+// arguments. A slice, `&[f32]`, for example, is represented as a pointer and a length on
+// llvm-ir level. The number of activities matches the number of Rust level arguments, so we
+// need to match those.
+// FIXME(ZuseZ4): This logic is a bit more complicated than it should be, can we simplify it
+// using iterators and peek()?
+fn match_args_from_caller_to_enzyme<'ll>(
+    cx: &SimpleCx<'ll>,
+    builder: &SBuilder<'ll, 'll>,
+    width: u32,
+    args: &mut Vec<&'ll llvm::Value>,
+    inputs: &[DiffActivity],
+    outer_args: &[&'ll llvm::Value],
+    has_sret: bool,
+) {
+    debug!("matching autodiff arguments");
+    // We now handle the issue that Rust level arguments not always match the llvm-ir level
+    // arguments. A slice, `&[f32]`, for example, is represented as a pointer and a length on
+    // llvm-ir level. The number of activities matches the number of Rust level arguments, so we
+    // need to match those.
+    // FIXME(ZuseZ4): This logic is a bit more complicated than it should be, can we simplify it
+    // using iterators and peek()?
+    let mut outer_pos: usize = 0;
+    let mut activity_pos = 0;
+
+    if has_sret {
+        // Then the first outer arg is the sret pointer. Enzyme doesn't know about sret, so the
+        // inner function will still return something. We increase our outer_pos by one,
+        // and once we're done with all other args we will take the return of the inner call and
+        // update the sret pointer with it
+        outer_pos = 1;
+    }
+
+    let enzyme_const = cx.create_metadata("enzyme_const".to_string()).unwrap();
+    let enzyme_out = cx.create_metadata("enzyme_out".to_string()).unwrap();
+    let enzyme_dup = cx.create_metadata("enzyme_dup".to_string()).unwrap();
+    let enzyme_dupv = cx.create_metadata("enzyme_dupv".to_string()).unwrap();
+    let enzyme_dupnoneed = cx.create_metadata("enzyme_dupnoneed".to_string()).unwrap();
+    let enzyme_dupnoneedv = cx.create_metadata("enzyme_dupnoneedv".to_string()).unwrap();
+
+    while activity_pos < inputs.len() {
+        let diff_activity = inputs[activity_pos as usize];
+        // Duplicated arguments received a shadow argument, into which enzyme will write the
+        // gradient.
+        let (activity, duplicated): (&Metadata, bool) = match diff_activity {
+            DiffActivity::None => panic!("not a valid input activity"),
+            DiffActivity::Const => (enzyme_const, false),
+            DiffActivity::Active => (enzyme_out, false),
+            DiffActivity::ActiveOnly => (enzyme_out, false),
+            DiffActivity::Dual => (enzyme_dup, true),
+            DiffActivity::Dualv => (enzyme_dupv, true),
+            DiffActivity::DualOnly => (enzyme_dupnoneed, true),
+            DiffActivity::DualvOnly => (enzyme_dupnoneedv, true),
+            DiffActivity::Duplicated => (enzyme_dup, true),
+            DiffActivity::DuplicatedOnly => (enzyme_dupnoneed, true),
+            DiffActivity::FakeActivitySize(_) => (enzyme_const, false),
+        };
+        let outer_arg = outer_args[outer_pos];
+        args.push(cx.get_metadata_value(activity));
+        if matches!(diff_activity, DiffActivity::Dualv) {
+            let next_outer_arg = outer_args[outer_pos + 1];
+            let elem_bytes_size: u64 = match inputs[activity_pos + 1] {
+                DiffActivity::FakeActivitySize(Some(s)) => s.into(),
+                _ => bug!("incorrect Dualv handling recognized."),
+            };
+            // stride: sizeof(T) * n_elems.
+            // n_elems is the next integer.
+            // Now we multiply `4 * next_outer_arg` to get the stride.
+            let mul = unsafe {
+                llvm::LLVMBuildMul(
+                    builder.llbuilder,
+                    cx.get_const_i64(elem_bytes_size),
+                    next_outer_arg,
+                    UNNAMED,
+                )
+            };
+            args.push(mul);
+        }
+        args.push(outer_arg);
+        if duplicated {
+            // We know that duplicated args by construction have a following argument,
+            // so this can not be out of bounds.
+            let next_outer_arg = outer_args[outer_pos + 1];
+            let next_outer_ty = cx.val_ty(next_outer_arg);
+            // FIXME(ZuseZ4): We should add support for Vec here too, but it's less urgent since
+            // vectors behind references (&Vec<T>) are already supported. Users can not pass a
+            // Vec by value for reverse mode, so this would only help forward mode autodiff.
+            let slice = {
+                if activity_pos + 1 >= inputs.len() {
+                    // If there is no arg following our ptr, it also can't be a slice,
+                    // since that would lead to a ptr, int pair.
+                    false
+                } else {
+                    let next_activity = inputs[activity_pos + 1];
+                    // We analyze the MIR types and add this dummy activity if we visit a slice.
+                    matches!(next_activity, DiffActivity::FakeActivitySize(_))
+                }
+            };
+            if slice {
+                // A duplicated slice will have the following two outer_fn arguments:
+                // (..., ptr1, int1, ptr2, int2, ...). We add the following llvm-ir to our __enzyme call:
+                // (..., metadata! enzyme_dup, ptr, ptr, int1, ...).
+                // FIXME(ZuseZ4): We will upstream a safety check later which asserts that
+                // int2 >= int1, which means the shadow vector is large enough to store the gradient.
+                assert_eq!(cx.type_kind(next_outer_ty), TypeKind::Integer);
+
+                let iterations =
+                    if matches!(diff_activity, DiffActivity::Dualv) { 1 } else { width as usize };
+
+                for i in 0..iterations {
+                    let next_outer_arg2 = outer_args[outer_pos + 2 * (i + 1)];
+                    let next_outer_ty2 = cx.val_ty(next_outer_arg2);
+                    assert_eq!(cx.type_kind(next_outer_ty2), TypeKind::Pointer);
+                    let next_outer_arg3 = outer_args[outer_pos + 2 * (i + 1) + 1];
+                    let next_outer_ty3 = cx.val_ty(next_outer_arg3);
+                    assert_eq!(cx.type_kind(next_outer_ty3), TypeKind::Integer);
+                    args.push(next_outer_arg2);
+                }
+                args.push(cx.get_metadata_value(enzyme_const));
+                args.push(next_outer_arg);
+                outer_pos += 2 + 2 * iterations;
+                activity_pos += 2;
+            } else {
+                // A duplicated pointer will have the following two outer_fn arguments:
+                // (..., ptr, ptr, ...). We add the following llvm-ir to our __enzyme call:
+                // (..., metadata! enzyme_dup, ptr, ptr, ...).
+                if matches!(diff_activity, DiffActivity::Duplicated | DiffActivity::DuplicatedOnly)
+                {
+                    assert_eq!(cx.type_kind(next_outer_ty), TypeKind::Pointer);
+                }
+                // In the case of Dual we don't have assumptions, e.g. f32 would be valid.
+                args.push(next_outer_arg);
+                outer_pos += 2;
+                activity_pos += 1;
+
+                // Now, if width > 1, we need to account for that
+                for _ in 1..width {
+                    let next_outer_arg = outer_args[outer_pos];
+                    args.push(next_outer_arg);
+                    outer_pos += 1;
+                }
+            }
+        } else {
+            // We do not differentiate with resprect to this argument.
+            // We already added the metadata and argument above, so just increase the counters.
+            outer_pos += 1;
+            activity_pos += 1;
+        }
+    }
+}
+
+// On LLVM-IR, we can luckily declare __enzyme_ functions without specifying the input
+// arguments. We do however need to declare them with their correct return type.
+// We already figured the correct return type out in our frontend, when generating the outer_fn,
+// so we can now just go ahead and use that. This is not always trivial, e.g. because sret.
+// Beyond sret, this article describes our challenges nicely:
+// <https://yorickpeterse.com/articles/the-mess-that-is-handling-structure-arguments-and-returns-in-llvm/>
+// I.e. (i32, f32) will get merged into i64, but we don't handle that yet.
+fn compute_enzyme_fn_ty<'ll>(
+    cx: &SimpleCx<'ll>,
+    attrs: &AutoDiffAttrs,
+    fn_to_diff: &'ll Value,
+    outer_fn: &'ll Value,
+) -> &'ll llvm::Type {
+    let fn_ty = cx.get_type_of_global(outer_fn);
+    let mut ret_ty = cx.get_return_type(fn_ty);
+
+    let has_sret = has_sret(outer_fn);
+
+    if has_sret {
+        // Now we don't just forward the return type, so we have to figure it out based on the
+        // primal return type, in combination with the autodiff settings.
+        let fn_ty = cx.get_type_of_global(fn_to_diff);
+        let inner_ret_ty = cx.get_return_type(fn_ty);
+
+        let void_ty = unsafe { llvm::LLVMVoidTypeInContext(cx.llcx) };
+        if inner_ret_ty == void_ty {
+            // This indicates that even the inner function has an sret.
+            // Right now I only look for an sret in the outer function.
+            // This *probably* needs some extra handling, but I never ran
+            // into such a case. So I'll wait for user reports to have a test case.
+            bug!("sret in inner function");
+        }
+
+        if attrs.width == 1 {
+            // Enzyme returns a struct of style:
+            // `{ original_ret(if requested), float, float, ... }`
+            let mut struct_elements = vec![];
+            if attrs.has_primal_ret() {
+                struct_elements.push(inner_ret_ty);
+            }
+            // Next, we push the list of active floats, since they will be lowered to `enzyme_out`,
+            // and therefore part of the return struct.
+            let param_tys = cx.func_params_types(fn_ty);
+            for (act, param_ty) in attrs.input_activity.iter().zip(param_tys) {
+                if matches!(act, DiffActivity::Active) {
+                    // Now find the float type at position i based on the fn_ty,
+                    // to know what (f16/f32/f64/...) to add to the struct.
+                    struct_elements.push(param_ty);
+                }
+            }
+            ret_ty = cx.type_struct(&struct_elements, false);
+        } else {
+            // First we check if we also have to deal with the primal return.
+            match attrs.mode {
+                DiffMode::Forward => match attrs.ret_activity {
+                    DiffActivity::Dual => {
+                        let arr_ty =
+                            unsafe { llvm::LLVMArrayType2(inner_ret_ty, attrs.width as u64 + 1) };
+                        ret_ty = arr_ty;
+                    }
+                    DiffActivity::DualOnly => {
+                        let arr_ty =
+                            unsafe { llvm::LLVMArrayType2(inner_ret_ty, attrs.width as u64) };
+                        ret_ty = arr_ty;
+                    }
+                    DiffActivity::Const => {
+                        todo!("Not sure, do we need to do something here?");
+                    }
+                    _ => {
+                        bug!("unreachable");
+                    }
+                },
+                DiffMode::Reverse => {
+                    todo!("Handle sret for reverse mode");
+                }
+                _ => {
+                    bug!("unreachable");
+                }
+            }
+        }
+    }
+
+    // LLVM can figure out the input types on it's own, so we take a shortcut here.
+    unsafe { llvm::LLVMFunctionType(ret_ty, ptr::null(), 0, True) }
 }
 
 /// When differentiating `fn_to_diff`, take a `outer_fn` and generate another
@@ -43,9 +295,6 @@ fn generate_enzyme_call<'ll>(
     outer_fn: &'ll Value,
     attrs: AutoDiffAttrs,
 ) {
-    let inputs = attrs.input_activity;
-    let output = attrs.ret_activity;
-
     // We have to pick the name depending on whether we want forward or reverse mode autodiff.
     let mut ad_name: String = match attrs.mode {
         DiffMode::Forward => "__enzyme_fwddiff",
@@ -93,17 +342,9 @@ fn generate_enzyme_call<'ll>(
     // }
     // ```
     unsafe {
-        // On LLVM-IR, we can luckily declare __enzyme_ functions without specifying the input
-        // arguments. We do however need to declare them with their correct return type.
-        // We already figured the correct return type out in our frontend, when generating the outer_fn,
-        // so we can now just go ahead and use that. FIXME(ZuseZ4): This doesn't handle sret yet.
-        let fn_ty = llvm::LLVMGlobalGetValueType(outer_fn);
-        let ret_ty = llvm::LLVMGetReturnType(fn_ty);
-
-        // LLVM can figure out the input types on it's own, so we take a shortcut here.
-        let enzyme_ty = llvm::LLVMFunctionType(ret_ty, ptr::null(), 0, True);
+        let enzyme_ty = compute_enzyme_fn_ty(cx, &attrs, fn_to_diff, outer_fn);
 
-        //FIXME(ZuseZ4): the CC/Addr/Vis values are best effort guesses, we should look at tests and
+        // FIXME(ZuseZ4): the CC/Addr/Vis values are best effort guesses, we should look at tests and
         // think a bit more about what should go here.
         let cc = llvm::LLVMGetFunctionCallConv(outer_fn);
         let ad_fn = declare_simple_fn(
@@ -132,116 +373,32 @@ fn generate_enzyme_call<'ll>(
         let mut args = Vec::with_capacity(num_args as usize + 1);
         args.push(fn_to_diff);
 
-        let enzyme_const = cx.create_metadata("enzyme_const".to_string()).unwrap();
-        let enzyme_out = cx.create_metadata("enzyme_out".to_string()).unwrap();
-        let enzyme_dup = cx.create_metadata("enzyme_dup".to_string()).unwrap();
-        let enzyme_dupnoneed = cx.create_metadata("enzyme_dupnoneed".to_string()).unwrap();
         let enzyme_primal_ret = cx.create_metadata("enzyme_primal_return".to_string()).unwrap();
-
-        match output {
-            DiffActivity::Dual => {
-                args.push(cx.get_metadata_value(enzyme_primal_ret));
-            }
-            DiffActivity::Active => {
-                args.push(cx.get_metadata_value(enzyme_primal_ret));
-            }
-            _ => {}
+        if matches!(attrs.ret_activity, DiffActivity::Dual | DiffActivity::Active) {
+            args.push(cx.get_metadata_value(enzyme_primal_ret));
+        }
+        if attrs.width > 1 {
+            let enzyme_width = cx.create_metadata("enzyme_width".to_string()).unwrap();
+            args.push(cx.get_metadata_value(enzyme_width));
+            args.push(cx.get_const_i64(attrs.width as u64));
         }
 
-        debug!("matching autodiff arguments");
-        // We now handle the issue that Rust level arguments not always match the llvm-ir level
-        // arguments. A slice, `&[f32]`, for example, is represented as a pointer and a length on
-        // llvm-ir level. The number of activities matches the number of Rust level arguments, so we
-        // need to match those.
-        // FIXME(ZuseZ4): This logic is a bit more complicated than it should be, can we simplify it
-        // using iterators and peek()?
-        let mut outer_pos: usize = 0;
-        let mut activity_pos = 0;
+        let has_sret = has_sret(outer_fn);
         let outer_args: Vec<&llvm::Value> = get_params(outer_fn);
-        while activity_pos < inputs.len() {
-            let diff_activity = inputs[activity_pos as usize];
-            // Duplicated arguments received a shadow argument, into which enzyme will write the
-            // gradient.
-            let (activity, duplicated): (&Metadata, bool) = match diff_activity {
-                DiffActivity::None => panic!("not a valid input activity"),
-                DiffActivity::Const => (enzyme_const, false),
-                DiffActivity::Active => (enzyme_out, false),
-                DiffActivity::ActiveOnly => (enzyme_out, false),
-                DiffActivity::Dual => (enzyme_dup, true),
-                DiffActivity::DualOnly => (enzyme_dupnoneed, true),
-                DiffActivity::Duplicated => (enzyme_dup, true),
-                DiffActivity::DuplicatedOnly => (enzyme_dupnoneed, true),
-                DiffActivity::FakeActivitySize => (enzyme_const, false),
-            };
-            let outer_arg = outer_args[outer_pos];
-            args.push(cx.get_metadata_value(activity));
-            args.push(outer_arg);
-            if duplicated {
-                // We know that duplicated args by construction have a following argument,
-                // so this can not be out of bounds.
-                let next_outer_arg = outer_args[outer_pos + 1];
-                let next_outer_ty = cx.val_ty(next_outer_arg);
-                // FIXME(ZuseZ4): We should add support for Vec here too, but it's less urgent since
-                // vectors behind references (&Vec<T>) are already supported. Users can not pass a
-                // Vec by value for reverse mode, so this would only help forward mode autodiff.
-                let slice = {
-                    if activity_pos + 1 >= inputs.len() {
-                        // If there is no arg following our ptr, it also can't be a slice,
-                        // since that would lead to a ptr, int pair.
-                        false
-                    } else {
-                        let next_activity = inputs[activity_pos + 1];
-                        // We analyze the MIR types and add this dummy activity if we visit a slice.
-                        next_activity == DiffActivity::FakeActivitySize
-                    }
-                };
-                if slice {
-                    // A duplicated slice will have the following two outer_fn arguments:
-                    // (..., ptr1, int1, ptr2, int2, ...). We add the following llvm-ir to our __enzyme call:
-                    // (..., metadata! enzyme_dup, ptr, ptr, int1, ...).
-                    // FIXME(ZuseZ4): We will upstream a safety check later which asserts that
-                    // int2 >= int1, which means the shadow vector is large enough to store the gradient.
-                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Integer);
-                    let next_outer_arg2 = outer_args[outer_pos + 2];
-                    let next_outer_ty2 = cx.val_ty(next_outer_arg2);
-                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty2) == llvm::TypeKind::Pointer);
-                    let next_outer_arg3 = outer_args[outer_pos + 3];
-                    let next_outer_ty3 = cx.val_ty(next_outer_arg3);
-                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty3) == llvm::TypeKind::Integer);
-                    args.push(next_outer_arg2);
-                    args.push(cx.get_metadata_value(enzyme_const));
-                    args.push(next_outer_arg);
-                    outer_pos += 4;
-                    activity_pos += 2;
-                } else {
-                    // A duplicated pointer will have the following two outer_fn arguments:
-                    // (..., ptr, ptr, ...). We add the following llvm-ir to our __enzyme call:
-                    // (..., metadata! enzyme_dup, ptr, ptr, ...).
-                    if matches!(
-                        diff_activity,
-                        DiffActivity::Duplicated | DiffActivity::DuplicatedOnly
-                    ) {
-                        assert!(
-                            llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Pointer
-                        );
-                    }
-                    // In the case of Dual we don't have assumptions, e.g. f32 would be valid.
-                    args.push(next_outer_arg);
-                    outer_pos += 2;
-                    activity_pos += 1;
-                }
-            } else {
-                // We do not differentiate with resprect to this argument.
-                // We already added the metadata and argument above, so just increase the counters.
-                outer_pos += 1;
-                activity_pos += 1;
-            }
-        }
+        match_args_from_caller_to_enzyme(
+            &cx,
+            &builder,
+            attrs.width,
+            &mut args,
+            &attrs.input_activity,
+            &outer_args,
+            has_sret,
+        );
 
         let call = builder.call(enzyme_ty, ad_fn, &args, None);
 
         // This part is a bit iffy. LLVM requires that a call to an inlineable function has some
-        // metadata attachted to it, but we just created this code oota. Given that the
+        // metadata attached to it, but we just created this code oota. Given that the
         // differentiated function already has partly confusing metadata, and given that this
         // affects nothing but the auttodiff IR, we take a shortcut and just steal metadata from the
         // dummy code which we inserted at a higher level.
@@ -262,7 +419,26 @@ fn generate_enzyme_call<'ll>(
         // Now that we copied the metadata, get rid of dummy code.
         llvm::LLVMRustEraseInstUntilInclusive(entry, last_inst);
 
-        if cx.val_ty(call) == cx.type_void() {
+        if cx.val_ty(call) == cx.type_void() || has_sret {
+            if has_sret {
+                // This is what we already have in our outer_fn (shortened):
+                // define void @_foo(ptr <..> sret([32 x i8]) initializes((0, 32)) %0, <...>) {
+                //   %7 = call [4 x double] (...) @__enzyme_fwddiff_foo(ptr @square, metadata !"enzyme_width", i64 4, <...>)
+                //   <Here we are, we want to add the following two lines>
+                //   store [4 x double] %7, ptr %0, align 8
+                //   ret void
+                // }
+
+                // now store the result of the enzyme call into the sret pointer.
+                let sret_ptr = outer_args[0];
+                let call_ty = cx.val_ty(call);
+                if attrs.width == 1 {
+                    assert_eq!(cx.type_kind(call_ty), TypeKind::Struct);
+                } else {
+                    assert_eq!(cx.type_kind(call_ty), TypeKind::Array);
+                }
+                llvm::LLVMBuildStore(&builder.llbuilder, call, sret_ptr);
+            }
             builder.ret_void();
         } else {
             builder.ret(call);
@@ -294,8 +470,7 @@ pub(crate) fn differentiate<'ll>(
     if !diff_items.is_empty()
         && !cgcx.opts.unstable_opts.autodiff.contains(&rustc_session::config::AutoDiff::Enable)
     {
-        let dcx = cgcx.create_dcx();
-        return Err(dcx.handle().emit_almost_fatal(AutoDiffWithoutEnable));
+        return Err(diag_handler.handle().emit_almost_fatal(AutoDiffWithoutEnable));
     }
 
     // Before dumping the module, we want all the TypeTrees to become part of the module.
diff --git a/compiler/rustc_codegen_llvm/src/common.rs b/compiler/rustc_codegen_llvm/src/common.rs
index 457e5452ce9..a6f277e4455 100644
--- a/compiler/rustc_codegen_llvm/src/common.rs
+++ b/compiler/rustc_codegen_llvm/src/common.rs
@@ -4,8 +4,8 @@ use std::borrow::Borrow;
 
 use libc::{c_char, c_uint};
 use rustc_abi as abi;
+use rustc_abi::HasDataLayout;
 use rustc_abi::Primitive::Pointer;
-use rustc_abi::{AddressSpace, HasDataLayout};
 use rustc_ast::Mutability;
 use rustc_codegen_ssa::common::TypeKind;
 use rustc_codegen_ssa::traits::*;
@@ -269,7 +269,8 @@ impl<'ll, 'tcx> ConstCodegenMethods for CodegenCx<'ll, 'tcx> {
             }
             Scalar::Ptr(ptr, _size) => {
                 let (prov, offset) = ptr.into_parts();
-                let (base_addr, base_addr_space) = match self.tcx.global_alloc(prov.alloc_id()) {
+                let global_alloc = self.tcx.global_alloc(prov.alloc_id());
+                let base_addr = match global_alloc {
                     GlobalAlloc::Memory(alloc) => {
                         // For ZSTs directly codegen an aligned pointer.
                         // This avoids generating a zero-sized constant value and actually needing a
@@ -301,12 +302,10 @@ impl<'ll, 'tcx> ConstCodegenMethods for CodegenCx<'ll, 'tcx> {
                                     format!("alloc_{hash:032x}").as_bytes(),
                                 );
                             }
-                            (value, AddressSpace::DATA)
+                            value
                         }
                     }
-                    GlobalAlloc::Function { instance, .. } => {
-                        (self.get_fn_addr(instance), self.data_layout().instruction_address_space)
-                    }
+                    GlobalAlloc::Function { instance, .. } => self.get_fn_addr(instance),
                     GlobalAlloc::VTable(ty, dyn_ty) => {
                         let alloc = self
                             .tcx
@@ -319,14 +318,15 @@ impl<'ll, 'tcx> ConstCodegenMethods for CodegenCx<'ll, 'tcx> {
                             .unwrap_memory();
                         let init = const_alloc_to_llvm(self, alloc, /*static*/ false);
                         let value = self.static_addr_of_impl(init, alloc.inner().align, None);
-                        (value, AddressSpace::DATA)
+                        value
                     }
                     GlobalAlloc::Static(def_id) => {
                         assert!(self.tcx.is_static(def_id));
                         assert!(!self.tcx.is_thread_local_static(def_id));
-                        (self.get_static(def_id), AddressSpace::DATA)
+                        self.get_static(def_id)
                     }
                 };
+                let base_addr_space = global_alloc.address_space(self);
                 let llval = unsafe {
                     llvm::LLVMConstInBoundsGEP2(
                         self.type_i8(),
diff --git a/compiler/rustc_codegen_llvm/src/consts.rs b/compiler/rustc_codegen_llvm/src/consts.rs
index 62fa2884e0f..bf81eb648f8 100644
--- a/compiler/rustc_codegen_llvm/src/consts.rs
+++ b/compiler/rustc_codegen_llvm/src/consts.rs
@@ -129,7 +129,12 @@ pub(crate) fn const_alloc_to_llvm<'ll>(
         append_chunks_of_init_and_uninit_bytes(&mut llvals, cx, alloc, range);
     }
 
-    cx.const_struct(&llvals, true)
+    // Avoid wrapping in a struct if there is only a single value. This ensures
+    // that LLVM is able to perform the string merging optimization if the constant
+    // is a valid C string. LLVM only considers bare arrays for this optimization,
+    // not arrays wrapped in a struct. LLVM handles this at:
+    // https://github.com/rust-lang/llvm-project/blob/acaea3d2bb8f351b740db7ebce7d7a40b9e21488/llvm/lib/Target/TargetLoweringObjectFile.cpp#L249-L280
+    if let &[data] = &*llvals { data } else { cx.const_struct(&llvals, true) }
 }
 
 fn codegen_static_initializer<'ll, 'tcx>(
@@ -425,7 +430,7 @@ impl<'ll> CodegenCx<'ll, '_> {
             let val_llty = self.val_ty(v);
 
             let g = self.get_static_inner(def_id, val_llty);
-            let llty = llvm::LLVMGlobalGetValueType(g);
+            let llty = self.get_type_of_global(g);
 
             let g = if val_llty == llty {
                 g
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index 926445c780b..4ec69995518 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -8,6 +8,7 @@ use std::str;
 use rustc_abi::{HasDataLayout, Size, TargetDataLayout, VariantIdx};
 use rustc_codegen_ssa::back::versioned_llvm_target;
 use rustc_codegen_ssa::base::{wants_msvc_seh, wants_wasm_eh};
+use rustc_codegen_ssa::common::TypeKind;
 use rustc_codegen_ssa::errors as ssa_errors;
 use rustc_codegen_ssa::traits::*;
 use rustc_data_structures::base_n::{ALPHANUMERIC_ONLY, ToBaseN};
@@ -38,7 +39,7 @@ use crate::debuginfo::metadata::apply_vcall_visibility_metadata;
 use crate::llvm::Metadata;
 use crate::type_::Type;
 use crate::value::Value;
-use crate::{attributes, coverageinfo, debuginfo, llvm, llvm_util};
+use crate::{attributes, common, coverageinfo, debuginfo, llvm, llvm_util};
 
 /// `TyCtxt` (and related cache datastructures) can't be move between threads.
 /// However, there are various cx related functions which we want to be available to the builder and
@@ -163,23 +164,6 @@ pub(crate) unsafe fn create_module<'ll>(
     let mut target_data_layout = sess.target.data_layout.to_string();
     let llvm_version = llvm_util::get_version();
 
-    if llvm_version < (19, 0, 0) {
-        if sess.target.arch == "aarch64" || sess.target.arch.starts_with("arm64") {
-            // LLVM 19 sets -Fn32 in its data layout string for 64-bit ARM
-            // Earlier LLVMs leave this default, so remove it.
-            // See https://github.com/llvm/llvm-project/pull/90702
-            target_data_layout = target_data_layout.replace("-Fn32", "");
-        }
-    }
-
-    if llvm_version < (19, 0, 0) {
-        if sess.target.arch == "loongarch64" {
-            // LLVM 19 updates the LoongArch64 data layout.
-            // See https://github.com/llvm/llvm-project/pull/93814
-            target_data_layout = target_data_layout.replace("-n32:64", "-n64");
-        }
-    }
-
     if llvm_version < (20, 0, 0) {
         if sess.target.arch == "aarch64" || sess.target.arch.starts_with("arm64") {
             // LLVM 20 defines three additional address spaces for alternate
@@ -327,6 +311,22 @@ pub(crate) unsafe fn create_module<'ll>(
                 pfe.prefix().into(),
             );
         }
+
+        // Add "kcfi-arity" module flag if KCFI arity indicator is enabled. (See
+        // https://github.com/llvm/llvm-project/pull/117121.)
+        if sess.is_sanitizer_kcfi_arity_enabled() {
+            // KCFI arity indicator requires LLVM 21.0.0 or later.
+            if llvm_version < (21, 0, 0) {
+                tcx.dcx().emit_err(crate::errors::SanitizerKcfiArityRequiresLLVM2100);
+            }
+
+            llvm::add_module_flag_u32(
+                llmod,
+                llvm::ModuleFlagMergeBehavior::Override,
+                "kcfi-arity",
+                1,
+            );
+        }
     }
 
     // Control Flow Guard is currently only supported by MSVC and LLVM on Windows.
@@ -643,7 +643,18 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
         llvm::set_section(g, c"llvm.metadata");
     }
 }
-
+impl<'ll> SimpleCx<'ll> {
+    pub(crate) fn get_return_type(&self, ty: &'ll Type) -> &'ll Type {
+        assert_eq!(self.type_kind(ty), TypeKind::Function);
+        unsafe { llvm::LLVMGetReturnType(ty) }
+    }
+    pub(crate) fn get_type_of_global(&self, val: &'ll Value) -> &'ll Type {
+        unsafe { llvm::LLVMGlobalGetValueType(val) }
+    }
+    pub(crate) fn val_ty(&self, v: &'ll Value) -> &'ll Type {
+        common::val_ty(v)
+    }
+}
 impl<'ll> SimpleCx<'ll> {
     pub(crate) fn new(
         llmod: &'ll llvm::Module,
@@ -660,6 +671,13 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
         llvm::LLVMMetadataAsValue(self.llcx(), metadata)
     }
 
+    // FIXME(autodiff): We should split `ConstCodegenMethods` to pull the reusable parts
+    // onto a trait that is also implemented for GenericCx.
+    pub(crate) fn get_const_i64(&self, n: u64) -> &'ll Value {
+        let ty = unsafe { llvm::LLVMInt64TypeInContext(self.llcx()) };
+        unsafe { llvm::LLVMConstInt(ty, n, llvm::False) }
+    }
+
     pub(crate) fn get_function(&self, name: &str) -> Option<&'ll Value> {
         let name = SmallCStr::new(name);
         unsafe { llvm::LLVMGetNamedFunction((**self).borrow().llmod, name.as_ptr()) }
@@ -1127,6 +1145,18 @@ impl<'ll> CodegenCx<'ll, '_> {
         ifn!("llvm.usub.sat.i64", fn(t_i64, t_i64) -> t_i64);
         ifn!("llvm.usub.sat.i128", fn(t_i128, t_i128) -> t_i128);
 
+        ifn!("llvm.scmp.i8.i8", fn(t_i8, t_i8) -> t_i8);
+        ifn!("llvm.scmp.i8.i16", fn(t_i16, t_i16) -> t_i8);
+        ifn!("llvm.scmp.i8.i32", fn(t_i32, t_i32) -> t_i8);
+        ifn!("llvm.scmp.i8.i64", fn(t_i64, t_i64) -> t_i8);
+        ifn!("llvm.scmp.i8.i128", fn(t_i128, t_i128) -> t_i8);
+
+        ifn!("llvm.ucmp.i8.i8", fn(t_i8, t_i8) -> t_i8);
+        ifn!("llvm.ucmp.i8.i16", fn(t_i16, t_i16) -> t_i8);
+        ifn!("llvm.ucmp.i8.i32", fn(t_i32, t_i32) -> t_i8);
+        ifn!("llvm.ucmp.i8.i64", fn(t_i64, t_i64) -> t_i8);
+        ifn!("llvm.ucmp.i8.i128", fn(t_i128, t_i128) -> t_i8);
+
         ifn!("llvm.lifetime.start.p0i8", fn(t_i64, ptr) -> void);
         ifn!("llvm.lifetime.end.p0i8", fn(t_i64, ptr) -> void);
 
@@ -1171,10 +1201,8 @@ impl<'ll> CodegenCx<'ll, '_> {
 
         if self.sess().instrument_coverage() {
             ifn!("llvm.instrprof.increment", fn(ptr, t_i64, t_i32, t_i32) -> void);
-            if crate::llvm_util::get_version() >= (19, 0, 0) {
-                ifn!("llvm.instrprof.mcdc.parameters", fn(ptr, t_i64, t_i32) -> void);
-                ifn!("llvm.instrprof.mcdc.tvbitmap.update", fn(ptr, t_i64, t_i32, ptr) -> void);
-            }
+            ifn!("llvm.instrprof.mcdc.parameters", fn(ptr, t_i64, t_i32) -> void);
+            ifn!("llvm.instrprof.mcdc.tvbitmap.update", fn(ptr, t_i64, t_i32, ptr) -> void);
         }
 
         ifn!("llvm.type.test", fn(ptr, t_metadata) -> i1);
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs
index b617f4d37f5..f6000e72840 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs
@@ -146,6 +146,7 @@ pub(crate) struct CoverageSpan {
 #[derive(Clone, Debug, Default)]
 pub(crate) struct Regions {
     pub(crate) code_regions: Vec<CodeRegion>,
+    pub(crate) expansion_regions: Vec<ExpansionRegion>,
     pub(crate) branch_regions: Vec<BranchRegion>,
     pub(crate) mcdc_branch_regions: Vec<MCDCBranchRegion>,
     pub(crate) mcdc_decision_regions: Vec<MCDCDecisionRegion>,
@@ -154,10 +155,16 @@ pub(crate) struct Regions {
 impl Regions {
     /// Returns true if none of this structure's tables contain any regions.
     pub(crate) fn has_no_regions(&self) -> bool {
-        let Self { code_regions, branch_regions, mcdc_branch_regions, mcdc_decision_regions } =
-            self;
+        let Self {
+            code_regions,
+            expansion_regions,
+            branch_regions,
+            mcdc_branch_regions,
+            mcdc_decision_regions,
+        } = self;
 
         code_regions.is_empty()
+            && expansion_regions.is_empty()
             && branch_regions.is_empty()
             && mcdc_branch_regions.is_empty()
             && mcdc_decision_regions.is_empty()
@@ -172,6 +179,14 @@ pub(crate) struct CodeRegion {
     pub(crate) counter: Counter,
 }
 
+/// Must match the layout of `LLVMRustCoverageExpansionRegion`.
+#[derive(Clone, Debug)]
+#[repr(C)]
+pub(crate) struct ExpansionRegion {
+    pub(crate) cov_span: CoverageSpan,
+    pub(crate) expanded_file_id: u32,
+}
+
 /// Must match the layout of `LLVMRustCoverageBranchRegion`.
 #[derive(Clone, Debug)]
 #[repr(C)]
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs
index 2cd7fa3225a..907d6d41a1f 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs
@@ -63,8 +63,18 @@ pub(crate) fn write_function_mappings_to_buffer(
     expressions: &[ffi::CounterExpression],
     regions: &ffi::Regions,
 ) -> Vec<u8> {
-    let ffi::Regions { code_regions, branch_regions, mcdc_branch_regions, mcdc_decision_regions } =
-        regions;
+    let ffi::Regions {
+        code_regions,
+        expansion_regions,
+        branch_regions,
+        mcdc_branch_regions,
+        mcdc_decision_regions,
+    } = regions;
+
+    // SAFETY:
+    // - All types are FFI-compatible and have matching representations in Rust/C++.
+    // - For pointer/length pairs, the pointer and length come from the same vector or slice.
+    // - C++ code does not retain any pointers after the call returns.
     llvm::build_byte_buffer(|buffer| unsafe {
         llvm::LLVMRustCoverageWriteFunctionMappingsToBuffer(
             virtual_file_mapping.as_ptr(),
@@ -73,6 +83,8 @@ pub(crate) fn write_function_mappings_to_buffer(
             expressions.len(),
             code_regions.as_ptr(),
             code_regions.len(),
+            expansion_regions.as_ptr(),
+            expansion_regions.len(),
             branch_regions.as_ptr(),
             branch_regions.len(),
             mcdc_branch_regions.as_ptr(),
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
index 9a2473d6cf2..55b1e728b70 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
@@ -5,15 +5,11 @@ use rustc_abi::Align;
 use rustc_codegen_ssa::traits::{
     BaseTypeCodegenMethods, ConstCodegenMethods, StaticCodegenMethods,
 };
-use rustc_data_structures::fx::{FxHashSet, FxIndexMap};
-use rustc_hir::def_id::{DefId, LocalDefId};
+use rustc_data_structures::fx::FxIndexMap;
 use rustc_index::IndexVec;
-use rustc_middle::mir;
-use rustc_middle::mir::mono::MonoItemPartitions;
-use rustc_middle::ty::{self, TyCtxt};
+use rustc_middle::ty::TyCtxt;
 use rustc_session::RemapFileNameExt;
 use rustc_session::config::RemapPathScopeComponents;
-use rustc_span::def_id::DefIdSet;
 use rustc_span::{SourceFile, StableSourceFileId};
 use tracing::debug;
 
@@ -24,6 +20,7 @@ use crate::llvm;
 
 mod covfun;
 mod spans;
+mod unused;
 
 /// Generates and exports the coverage map, which is embedded in special
 /// linker sections in the final binary.
@@ -56,13 +53,6 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
         None => return,
     };
 
-    // The order of entries in this global file table needs to be deterministic,
-    // and ideally should also be independent of the details of stable-hashing,
-    // because coverage tests snapshots (`.cov-map`) can observe the order and
-    // would need to be re-blessed if it changes. As long as those requirements
-    // are satisfied, the order can be arbitrary.
-    let mut global_file_table = GlobalFileTable::new();
-
     let mut covfun_records = instances_used
         .iter()
         .copied()
@@ -70,18 +60,13 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
         // order that doesn't depend on the stable-hash-based order in which
         // instances were visited during codegen.
         .sorted_by_cached_key(|&instance| tcx.symbol_name(instance).name)
-        .filter_map(|instance| prepare_covfun_record(tcx, &mut global_file_table, instance, true))
+        .filter_map(|instance| prepare_covfun_record(tcx, instance, true))
         .collect::<Vec<_>>();
 
     // In a single designated CGU, also prepare covfun records for functions
     // in this crate that were instrumented for coverage, but are unused.
     if cx.codegen_unit.is_code_coverage_dead_code_cgu() {
-        let mut unused_instances = gather_unused_function_instances(cx);
-        // Sort the unused instances by symbol name, for the same reason as the used ones.
-        unused_instances.sort_by_cached_key(|&instance| tcx.symbol_name(instance).name);
-        covfun_records.extend(unused_instances.into_iter().filter_map(|instance| {
-            prepare_covfun_record(tcx, &mut global_file_table, instance, false)
-        }));
+        unused::prepare_covfun_records_for_unused_functions(cx, &mut covfun_records);
     }
 
     // If there are no covfun records for this CGU, don't generate a covmap record.
@@ -93,91 +78,88 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
         return;
     }
 
-    // Encode all filenames referenced by coverage mappings in this CGU.
-    let filenames_buffer = global_file_table.make_filenames_buffer(tcx);
-    // The `llvm-cov` tool uses this hash to associate each covfun record with
-    // its corresponding filenames table, since the final binary will typically
-    // contain multiple covmap records from different compilation units.
-    let filenames_hash = llvm_cov::hash_bytes(&filenames_buffer);
-
-    let mut unused_function_names = vec![];
+    // Prepare the global file table for this CGU, containing all paths needed
+    // by one or more covfun records.
+    let global_file_table =
+        GlobalFileTable::build(tcx, covfun_records.iter().flat_map(|c| c.all_source_files()));
 
     for covfun in &covfun_records {
-        unused_function_names.extend(covfun.mangled_function_name_if_unused());
-
-        covfun::generate_covfun_record(cx, filenames_hash, covfun)
-    }
-
-    // For unused functions, we need to take their mangled names and store them
-    // in a specially-named global array. LLVM's `InstrProfiling` pass will
-    // detect this global and include those names in its `__llvm_prf_names`
-    // section. (See `llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp`.)
-    if !unused_function_names.is_empty() {
-        assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
-
-        let name_globals = unused_function_names
-            .into_iter()
-            .map(|mangled_function_name| cx.const_str(mangled_function_name).0)
-            .collect::<Vec<_>>();
-        let initializer = cx.const_array(cx.type_ptr(), &name_globals);
-
-        let array = llvm::add_global(cx.llmod, cx.val_ty(initializer), c"__llvm_coverage_names");
-        llvm::set_global_constant(array, true);
-        llvm::set_linkage(array, llvm::Linkage::InternalLinkage);
-        llvm::set_initializer(array, initializer);
+        covfun::generate_covfun_record(cx, &global_file_table, covfun)
     }
 
     // Generate the coverage map header, which contains the filenames used by
     // this CGU's coverage mappings, and store it in a well-known global.
     // (This is skipped if we returned early due to having no covfun records.)
-    generate_covmap_record(cx, covmap_version, &filenames_buffer);
+    generate_covmap_record(cx, covmap_version, &global_file_table.filenames_buffer);
 }
 
-/// Maps "global" (per-CGU) file ID numbers to their underlying source files.
+/// Maps "global" (per-CGU) file ID numbers to their underlying source file paths.
+#[derive(Debug)]
 struct GlobalFileTable {
     /// This "raw" table doesn't include the working dir, so a file's
     /// global ID is its index in this set **plus one**.
-    raw_file_table: FxIndexMap<StableSourceFileId, Arc<SourceFile>>,
+    raw_file_table: FxIndexMap<StableSourceFileId, String>,
+
+    /// The file table in encoded form (possibly compressed), which can be
+    /// included directly in this CGU's `__llvm_covmap` record.
+    filenames_buffer: Vec<u8>,
+
+    /// Truncated hash of the bytes in `filenames_buffer`.
+    ///
+    /// The `llvm-cov` tool uses this hash to associate each covfun record with
+    /// its corresponding filenames table, since the final binary will typically
+    /// contain multiple covmap records from different compilation units.
+    filenames_hash: u64,
 }
 
 impl GlobalFileTable {
-    fn new() -> Self {
-        Self { raw_file_table: FxIndexMap::default() }
-    }
+    /// Builds a "global file table" for this CGU, mapping numeric IDs to
+    /// path strings.
+    fn build<'a>(tcx: TyCtxt<'_>, all_files: impl Iterator<Item = &'a SourceFile>) -> Self {
+        let mut raw_file_table = FxIndexMap::default();
+
+        for file in all_files {
+            raw_file_table.entry(file.stable_id).or_insert_with(|| {
+                file.name
+                    .for_scope(tcx.sess, RemapPathScopeComponents::MACRO)
+                    .to_string_lossy()
+                    .into_owned()
+            });
+        }
 
-    fn global_file_id_for_file(&mut self, file: &Arc<SourceFile>) -> GlobalFileId {
-        // Ensure the given file has a table entry, and get its index.
-        let entry = self.raw_file_table.entry(file.stable_id);
-        let raw_id = entry.index();
-        entry.or_insert_with(|| Arc::clone(file));
+        // FIXME(Zalathar): Consider sorting the file table here, but maybe
+        // only after adding filename support to coverage-dump, so that the
+        // table order isn't directly visible in `.coverage-map` snapshots.
 
-        // The raw file table doesn't include an entry for the working dir
-        // (which has ID 0), so add 1 to get the correct ID.
-        GlobalFileId::from_usize(raw_id + 1)
-    }
+        let mut table = Vec::with_capacity(raw_file_table.len() + 1);
 
-    fn make_filenames_buffer(&self, tcx: TyCtxt<'_>) -> Vec<u8> {
-        let mut table = Vec::with_capacity(self.raw_file_table.len() + 1);
-
-        // LLVM Coverage Mapping Format version 6 (zero-based encoded as 5)
-        // requires setting the first filename to the compilation directory.
-        // Since rustc generates coverage maps with relative paths, the
-        // compilation directory can be combined with the relative paths
-        // to get absolute paths, if needed.
-        table.push(
-            tcx.sess
-                .opts
-                .working_dir
-                .for_scope(tcx.sess, RemapPathScopeComponents::MACRO)
-                .to_string_lossy(),
-        );
+        // Since version 6 of the LLVM coverage mapping format, the first entry
+        // in the global file table is treated as a base directory, used to
+        // resolve any other entries that are stored as relative paths.
+        let base_dir = tcx
+            .sess
+            .opts
+            .working_dir
+            .for_scope(tcx.sess, RemapPathScopeComponents::MACRO)
+            .to_string_lossy();
+        table.push(base_dir.as_ref());
 
         // Add the regular entries after the base directory.
-        table.extend(self.raw_file_table.values().map(|file| {
-            file.name.for_scope(tcx.sess, RemapPathScopeComponents::MACRO).to_string_lossy()
-        }));
+        table.extend(raw_file_table.values().map(|name| name.as_str()));
 
-        llvm_cov::write_filenames_to_buffer(&table)
+        // Encode the file table into a buffer, and get the hash of its encoded
+        // bytes, so that we can embed that hash in `__llvm_covfun` records.
+        let filenames_buffer = llvm_cov::write_filenames_to_buffer(&table);
+        let filenames_hash = llvm_cov::hash_bytes(&filenames_buffer);
+
+        Self { raw_file_table, filenames_buffer, filenames_hash }
+    }
+
+    fn get_existing_id(&self, file: &SourceFile) -> Option<GlobalFileId> {
+        let raw_id = self.raw_file_table.get_index_of(&file.stable_id)?;
+        // The raw file table doesn't include an entry for the base dir
+        // (which has ID 0), so add 1 to get the correct ID.
+        Some(GlobalFileId::from_usize(raw_id + 1))
     }
 }
 
@@ -193,26 +175,31 @@ rustc_index::newtype_index! {
     struct LocalFileId {}
 }
 
-/// Holds a mapping from "local" (per-function) file IDs to "global" (per-CGU)
-/// file IDs.
+/// Holds a mapping from "local" (per-function) file IDs to their corresponding
+/// source files.
 #[derive(Debug, Default)]
 struct VirtualFileMapping {
-    local_to_global: IndexVec<LocalFileId, GlobalFileId>,
-    global_to_local: FxIndexMap<GlobalFileId, LocalFileId>,
+    local_file_table: IndexVec<LocalFileId, Arc<SourceFile>>,
 }
 
 impl VirtualFileMapping {
-    fn local_id_for_global(&mut self, global_file_id: GlobalFileId) -> LocalFileId {
-        *self
-            .global_to_local
-            .entry(global_file_id)
-            .or_insert_with(|| self.local_to_global.push(global_file_id))
+    fn push_file(&mut self, source_file: &Arc<SourceFile>) -> LocalFileId {
+        self.local_file_table.push(Arc::clone(source_file))
     }
 
-    fn to_vec(&self) -> Vec<u32> {
-        // This clone could be avoided by transmuting `&[GlobalFileId]` to `&[u32]`,
-        // but it isn't hot or expensive enough to justify the extra unsafety.
-        self.local_to_global.iter().map(|&global| GlobalFileId::as_u32(global)).collect()
+    /// Resolves all of the filenames in this local file mapping to a list of
+    /// global file IDs in its CGU, for inclusion in this function's
+    /// `__llvm_covfun` record.
+    ///
+    /// The global file IDs are returned as `u32` to make FFI easier.
+    fn resolve_all(&self, global_file_table: &GlobalFileTable) -> Option<Vec<u32>> {
+        self.local_file_table
+            .iter()
+            .map(|file| try {
+                let id = global_file_table.get_existing_id(file)?;
+                GlobalFileId::as_u32(id)
+            })
+            .collect::<Option<Vec<_>>>()
     }
 }
 
@@ -249,121 +236,3 @@ fn generate_covmap_record<'ll>(cx: &CodegenCx<'ll, '_>, version: u32, filenames_
 
     cx.add_used_global(covmap_global);
 }
-
-/// Each CGU will normally only emit coverage metadata for the functions that it actually generates.
-/// But since we don't want unused functions to disappear from coverage reports, we also scan for
-/// functions that were instrumented but are not participating in codegen.
-///
-/// These unused functions don't need to be codegenned, but we do need to add them to the function
-/// coverage map (in a single designated CGU) so that we still emit coverage mappings for them.
-/// We also end up adding their symbol names to a special global array that LLVM will include in
-/// its embedded coverage data.
-fn gather_unused_function_instances<'tcx>(cx: &CodegenCx<'_, 'tcx>) -> Vec<ty::Instance<'tcx>> {
-    assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
-
-    let tcx = cx.tcx;
-    let usage = prepare_usage_sets(tcx);
-
-    let is_unused_fn = |def_id: LocalDefId| -> bool {
-        // Usage sets expect `DefId`, so convert from `LocalDefId`.
-        let d: DefId = LocalDefId::to_def_id(def_id);
-        // To be potentially eligible for "unused function" mappings, a definition must:
-        // - Be eligible for coverage instrumentation
-        // - Not participate directly in codegen (or have lost all its coverage statements)
-        // - Not have any coverage statements inlined into codegenned functions
-        tcx.is_eligible_for_coverage(def_id)
-            && (!usage.all_mono_items.contains(&d) || usage.missing_own_coverage.contains(&d))
-            && !usage.used_via_inlining.contains(&d)
-    };
-
-    // FIXME(#79651): Consider trying to filter out dummy instantiations of
-    // unused generic functions from library crates, because they can produce
-    // "unused instantiation" in coverage reports even when they are actually
-    // used by some downstream crate in the same binary.
-
-    tcx.mir_keys(())
-        .iter()
-        .copied()
-        .filter(|&def_id| is_unused_fn(def_id))
-        .map(|def_id| make_dummy_instance(tcx, def_id))
-        .collect::<Vec<_>>()
-}
-
-struct UsageSets<'tcx> {
-    all_mono_items: &'tcx DefIdSet,
-    used_via_inlining: FxHashSet<DefId>,
-    missing_own_coverage: FxHashSet<DefId>,
-}
-
-/// Prepare sets of definitions that are relevant to deciding whether something
-/// is an "unused function" for coverage purposes.
-fn prepare_usage_sets<'tcx>(tcx: TyCtxt<'tcx>) -> UsageSets<'tcx> {
-    let MonoItemPartitions { all_mono_items, codegen_units, .. } =
-        tcx.collect_and_partition_mono_items(());
-
-    // Obtain a MIR body for each function participating in codegen, via an
-    // arbitrary instance.
-    let mut def_ids_seen = FxHashSet::default();
-    let def_and_mir_for_all_mono_fns = codegen_units
-        .iter()
-        .flat_map(|cgu| cgu.items().keys())
-        .filter_map(|item| match item {
-            mir::mono::MonoItem::Fn(instance) => Some(instance),
-            mir::mono::MonoItem::Static(_) | mir::mono::MonoItem::GlobalAsm(_) => None,
-        })
-        // We only need one arbitrary instance per definition.
-        .filter(move |instance| def_ids_seen.insert(instance.def_id()))
-        .map(|instance| {
-            // We don't care about the instance, just its underlying MIR.
-            let body = tcx.instance_mir(instance.def);
-            (instance.def_id(), body)
-        });
-
-    // Functions whose coverage statements were found inlined into other functions.
-    let mut used_via_inlining = FxHashSet::default();
-    // Functions that were instrumented, but had all of their coverage statements
-    // removed by later MIR transforms (e.g. UnreachablePropagation).
-    let mut missing_own_coverage = FxHashSet::default();
-
-    for (def_id, body) in def_and_mir_for_all_mono_fns {
-        let mut saw_own_coverage = false;
-
-        // Inspect every coverage statement in the function's MIR.
-        for stmt in body
-            .basic_blocks
-            .iter()
-            .flat_map(|block| &block.statements)
-            .filter(|stmt| matches!(stmt.kind, mir::StatementKind::Coverage(_)))
-        {
-            if let Some(inlined) = stmt.source_info.scope.inlined_instance(&body.source_scopes) {
-                // This coverage statement was inlined from another function.
-                used_via_inlining.insert(inlined.def_id());
-            } else {
-                // Non-inlined coverage statements belong to the enclosing function.
-                saw_own_coverage = true;
-            }
-        }
-
-        if !saw_own_coverage && body.function_coverage_info.is_some() {
-            missing_own_coverage.insert(def_id);
-        }
-    }
-
-    UsageSets { all_mono_items, used_via_inlining, missing_own_coverage }
-}
-
-fn make_dummy_instance<'tcx>(tcx: TyCtxt<'tcx>, local_def_id: LocalDefId) -> ty::Instance<'tcx> {
-    let def_id = local_def_id.to_def_id();
-
-    // Make a dummy instance that fills in all generics with placeholders.
-    ty::Instance::new(
-        def_id,
-        ty::GenericArgs::for_item(tcx, def_id, |param, _| {
-            if let ty::GenericParamDefKind::Lifetime = param.kind {
-                tcx.lifetimes.re_erased.into()
-            } else {
-                tcx.mk_param_from_def(param)
-            }
-        }),
-    )
-}
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
index 80e54bf045e..7bdbc685952 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
@@ -5,6 +5,7 @@
 //! [^win]: On Windows the section name is `.lcovfun`.
 
 use std::ffi::CString;
+use std::sync::Arc;
 
 use rustc_abi::Align;
 use rustc_codegen_ssa::traits::{
@@ -15,7 +16,7 @@ use rustc_middle::mir::coverage::{
     MappingKind, Op,
 };
 use rustc_middle::ty::{Instance, TyCtxt};
-use rustc_span::Span;
+use rustc_span::{SourceFile, Span};
 use rustc_target::spec::HasTargetSpec;
 use tracing::debug;
 
@@ -38,16 +39,15 @@ pub(crate) struct CovfunRecord<'tcx> {
 }
 
 impl<'tcx> CovfunRecord<'tcx> {
-    /// FIXME(Zalathar): Make this the responsibility of the code that determines
-    /// which functions are unused.
-    pub(crate) fn mangled_function_name_if_unused(&self) -> Option<&'tcx str> {
-        (!self.is_used).then_some(self.mangled_function_name)
+    /// Iterator that yields all source files referred to by this function's
+    /// coverage mappings. Used to build the global file table for the CGU.
+    pub(crate) fn all_source_files(&self) -> impl Iterator<Item = &SourceFile> {
+        self.virtual_file_mapping.local_file_table.iter().map(Arc::as_ref)
     }
 }
 
 pub(crate) fn prepare_covfun_record<'tcx>(
     tcx: TyCtxt<'tcx>,
-    global_file_table: &mut GlobalFileTable,
     instance: Instance<'tcx>,
     is_used: bool,
 ) -> Option<CovfunRecord<'tcx>> {
@@ -65,7 +65,7 @@ pub(crate) fn prepare_covfun_record<'tcx>(
         regions: ffi::Regions::default(),
     };
 
-    fill_region_tables(tcx, global_file_table, fn_cov_info, ids_info, &mut covfun);
+    fill_region_tables(tcx, fn_cov_info, ids_info, &mut covfun);
 
     if covfun.regions.has_no_regions() {
         debug!(?covfun, "function has no mappings to embed; skipping");
@@ -100,28 +100,37 @@ fn prepare_expressions(ids_info: &CoverageIdsInfo) -> Vec<ffi::CounterExpression
 /// Populates the mapping region tables in the current function's covfun record.
 fn fill_region_tables<'tcx>(
     tcx: TyCtxt<'tcx>,
-    global_file_table: &mut GlobalFileTable,
     fn_cov_info: &'tcx FunctionCoverageInfo,
     ids_info: &'tcx CoverageIdsInfo,
     covfun: &mut CovfunRecord<'tcx>,
 ) {
-    // Currently a function's mappings must all be in the same file as its body span.
+    // Currently a function's mappings must all be in the same file, so use the
+    // first mapping's span to determine the file.
     let source_map = tcx.sess.source_map();
-    let source_file = source_map.lookup_source_file(fn_cov_info.body_span.lo());
-
-    // Look up the global file ID for that file.
-    let global_file_id = global_file_table.global_file_id_for_file(&source_file);
-
-    // Associate that global file ID with a local file ID for this function.
-    let local_file_id = covfun.virtual_file_mapping.local_id_for_global(global_file_id);
+    let Some(first_span) = (try { fn_cov_info.mappings.first()?.span }) else {
+        debug_assert!(false, "function has no mappings: {:?}", covfun.mangled_function_name);
+        return;
+    };
+    let source_file = source_map.lookup_source_file(first_span.lo());
 
-    let ffi::Regions { code_regions, branch_regions, mcdc_branch_regions, mcdc_decision_regions } =
-        &mut covfun.regions;
+    let local_file_id = covfun.virtual_file_mapping.push_file(&source_file);
 
-    let make_cov_span = |span: Span| {
-        spans::make_coverage_span(local_file_id, source_map, fn_cov_info, &source_file, span)
-    };
+    // In rare cases, _all_ of a function's spans are discarded, and coverage
+    // codegen needs to handle that gracefully to avoid #133606.
+    // It's hard for tests to trigger this organically, so instead we set
+    // `-Zcoverage-options=discard-all-spans-in-codegen` to force it to occur.
     let discard_all = tcx.sess.coverage_discard_all_spans_in_codegen();
+    let make_coords = |span: Span| {
+        if discard_all { None } else { spans::make_coords(source_map, &source_file, span) }
+    };
+
+    let ffi::Regions {
+        code_regions,
+        expansion_regions: _, // FIXME(Zalathar): Fill out support for expansion regions
+        branch_regions,
+        mcdc_branch_regions,
+        mcdc_decision_regions,
+    } = &mut covfun.regions;
 
     // For each counter/region pair in this function+file, convert it to a
     // form suitable for FFI.
@@ -136,17 +145,8 @@ fn fill_region_tables<'tcx>(
             ffi::Counter::from_term(term)
         };
 
-        // Convert the `Span` into coordinates that we can pass to LLVM, or
-        // discard the span if conversion fails. In rare, cases _all_ of a
-        // function's spans are discarded, and the rest of coverage codegen
-        // needs to handle that gracefully to avoid a repeat of #133606.
-        // We don't have a good test case for triggering that organically, so
-        // instead we set `-Zcoverage-options=discard-all-spans-in-codegen`
-        // to force it to occur.
-        let Some(cov_span) = make_cov_span(span) else { continue };
-        if discard_all {
-            continue;
-        }
+        let Some(coords) = make_coords(span) else { continue };
+        let cov_span = coords.make_coverage_span(local_file_id);
 
         match *kind {
             MappingKind::Code { bcb } => {
@@ -182,7 +182,7 @@ fn fill_region_tables<'tcx>(
 /// as a global variable in the `__llvm_covfun` section.
 pub(crate) fn generate_covfun_record<'tcx>(
     cx: &CodegenCx<'_, 'tcx>,
-    filenames_hash: u64,
+    global_file_table: &GlobalFileTable,
     covfun: &CovfunRecord<'tcx>,
 ) {
     let &CovfunRecord {
@@ -194,12 +194,19 @@ pub(crate) fn generate_covfun_record<'tcx>(
         ref regions,
     } = covfun;
 
+    let Some(local_file_table) = virtual_file_mapping.resolve_all(global_file_table) else {
+        debug_assert!(
+            false,
+            "all local files should be present in the global file table: \
+                global_file_table = {global_file_table:?}, \
+                virtual_file_mapping = {virtual_file_mapping:?}"
+        );
+        return;
+    };
+
     // Encode the function's coverage mappings into a buffer.
-    let coverage_mapping_buffer = llvm_cov::write_function_mappings_to_buffer(
-        &virtual_file_mapping.to_vec(),
-        expressions,
-        regions,
-    );
+    let coverage_mapping_buffer =
+        llvm_cov::write_function_mappings_to_buffer(&local_file_table, expressions, regions);
 
     // A covfun record consists of four target-endian integers, followed by the
     // encoded mapping data in bytes. Note that the length field is 32 bits.
@@ -212,7 +219,7 @@ pub(crate) fn generate_covfun_record<'tcx>(
             cx.const_u64(func_name_hash),
             cx.const_u32(coverage_mapping_buffer.len() as u32),
             cx.const_u64(source_hash),
-            cx.const_u64(filenames_hash),
+            cx.const_u64(global_file_table.filenames_hash),
             cx.const_bytes(&coverage_mapping_buffer),
         ],
         // This struct needs to be packed, so that the 32-bit length field
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs
index 6d1d91340c2..39a59560c9d 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs
@@ -1,4 +1,3 @@
-use rustc_middle::mir::coverage::FunctionCoverageInfo;
 use rustc_span::source_map::SourceMap;
 use rustc_span::{BytePos, Pos, SourceFile, Span};
 use tracing::debug;
@@ -6,24 +5,41 @@ use tracing::debug;
 use crate::coverageinfo::ffi;
 use crate::coverageinfo::mapgen::LocalFileId;
 
+/// Line and byte-column coordinates of a source code span within some file.
+/// The file itself must be tracked separately.
+#[derive(Clone, Copy, Debug)]
+pub(crate) struct Coords {
+    /// 1-based starting line of the source code span.
+    pub(crate) start_line: u32,
+    /// 1-based starting column (in bytes) of the source code span.
+    pub(crate) start_col: u32,
+    /// 1-based ending line of the source code span.
+    pub(crate) end_line: u32,
+    /// 1-based ending column (in bytes) of the source code span. High bit must be unset.
+    pub(crate) end_col: u32,
+}
+
+impl Coords {
+    /// Attaches a local file ID to these coordinates to produce an `ffi::CoverageSpan`.
+    pub(crate) fn make_coverage_span(&self, local_file_id: LocalFileId) -> ffi::CoverageSpan {
+        let &Self { start_line, start_col, end_line, end_col } = self;
+        let file_id = local_file_id.as_u32();
+        ffi::CoverageSpan { file_id, start_line, start_col, end_line, end_col }
+    }
+}
+
 /// Converts the span into its start line and column, and end line and column.
 ///
 /// Line numbers and column numbers are 1-based. Unlike most column numbers emitted by
 /// the compiler, these column numbers are denoted in **bytes**, because that's what
 /// LLVM's `llvm-cov` tool expects to see in coverage maps.
 ///
-/// Returns `None` if the conversion failed for some reason. This shouldn't happen,
+/// Returns `None` if the conversion failed for some reason. This should be uncommon,
 /// but it's hard to rule out entirely (especially in the presence of complex macros
 /// or other expansions), and if it does happen then skipping a span or function is
 /// better than an ICE or `llvm-cov` failure that the user might have no way to avoid.
-pub(crate) fn make_coverage_span(
-    file_id: LocalFileId,
-    source_map: &SourceMap,
-    fn_cov_info: &FunctionCoverageInfo,
-    file: &SourceFile,
-    span: Span,
-) -> Option<ffi::CoverageSpan> {
-    let span = ensure_non_empty_span(source_map, fn_cov_info, span)?;
+pub(crate) fn make_coords(source_map: &SourceMap, file: &SourceFile, span: Span) -> Option<Coords> {
+    let span = ensure_non_empty_span(source_map, span)?;
 
     let lo = span.lo();
     let hi = span.hi();
@@ -46,8 +62,7 @@ pub(crate) fn make_coverage_span(
     start_line = source_map.doctest_offset_line(&file.name, start_line);
     end_line = source_map.doctest_offset_line(&file.name, end_line);
 
-    check_coverage_span(ffi::CoverageSpan {
-        file_id: file_id.as_u32(),
+    check_coords(Coords {
         start_line: start_line as u32,
         start_col: start_col as u32,
         end_line: end_line as u32,
@@ -55,36 +70,22 @@ pub(crate) fn make_coverage_span(
     })
 }
 
-fn ensure_non_empty_span(
-    source_map: &SourceMap,
-    fn_cov_info: &FunctionCoverageInfo,
-    span: Span,
-) -> Option<Span> {
+fn ensure_non_empty_span(source_map: &SourceMap, span: Span) -> Option<Span> {
     if !span.is_empty() {
         return Some(span);
     }
 
-    let lo = span.lo();
-    let hi = span.hi();
-
-    // The span is empty, so try to expand it to cover an adjacent '{' or '}',
-    // but only within the bounds of the body span.
-    let try_next = hi < fn_cov_info.body_span.hi();
-    let try_prev = fn_cov_info.body_span.lo() < lo;
-    if !(try_next || try_prev) {
-        return None;
-    }
-
+    // The span is empty, so try to enlarge it to cover an adjacent '{' or '}'.
     source_map
         .span_to_source(span, |src, start, end| try {
             // Adjusting span endpoints by `BytePos(1)` is normally a bug,
             // but in this case we have specifically checked that the character
             // we're skipping over is one of two specific ASCII characters, so
             // adjusting by exactly 1 byte is correct.
-            if try_next && src.as_bytes()[end] == b'{' {
-                Some(span.with_hi(hi + BytePos(1)))
-            } else if try_prev && src.as_bytes()[start - 1] == b'}' {
-                Some(span.with_lo(lo - BytePos(1)))
+            if src.as_bytes().get(end).copied() == Some(b'{') {
+                Some(span.with_hi(span.hi() + BytePos(1)))
+            } else if start > 0 && src.as_bytes()[start - 1] == b'}' {
+                Some(span.with_lo(span.lo() - BytePos(1)))
             } else {
                 None
             }
@@ -96,8 +97,8 @@ fn ensure_non_empty_span(
 /// it will immediately exit with a fatal error. To prevent that from happening,
 /// discard regions that are improperly ordered, or might be interpreted in a
 /// way that makes them improperly ordered.
-fn check_coverage_span(cov_span: ffi::CoverageSpan) -> Option<ffi::CoverageSpan> {
-    let ffi::CoverageSpan { file_id: _, start_line, start_col, end_line, end_col } = cov_span;
+fn check_coords(coords: Coords) -> Option<Coords> {
+    let Coords { start_line, start_col, end_line, end_col } = coords;
 
     // Line/column coordinates are supposed to be 1-based. If we ever emit
     // coordinates of 0, `llvm-cov` might misinterpret them.
@@ -110,17 +111,17 @@ fn check_coverage_span(cov_span: ffi::CoverageSpan) -> Option<ffi::CoverageSpan>
     let is_ordered = (start_line, start_col) <= (end_line, end_col);
 
     if all_nonzero && end_col_has_high_bit_unset && is_ordered {
-        Some(cov_span)
+        Some(coords)
     } else {
         debug!(
-            ?cov_span,
+            ?coords,
             ?all_nonzero,
             ?end_col_has_high_bit_unset,
             ?is_ordered,
             "Skipping source region that would be misinterpreted or rejected by LLVM"
         );
         // If this happens in a debug build, ICE to make it easier to notice.
-        debug_assert!(false, "Improper source region: {cov_span:?}");
+        debug_assert!(false, "Improper source region: {coords:?}");
         None
     }
 }
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs
new file mode 100644
index 00000000000..68f60f169b5
--- /dev/null
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs
@@ -0,0 +1,170 @@
+use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, ConstCodegenMethods};
+use rustc_data_structures::fx::FxHashSet;
+use rustc_hir::def_id::{DefId, LocalDefId};
+use rustc_middle::mir;
+use rustc_middle::mir::mono::MonoItemPartitions;
+use rustc_middle::ty::{self, TyCtxt};
+use rustc_span::def_id::DefIdSet;
+
+use crate::common::CodegenCx;
+use crate::coverageinfo::mapgen::covfun::{CovfunRecord, prepare_covfun_record};
+use crate::llvm;
+
+/// Each CGU will normally only emit coverage metadata for the functions that it actually generates.
+/// But since we don't want unused functions to disappear from coverage reports, we also scan for
+/// functions that were instrumented but are not participating in codegen.
+///
+/// These unused functions don't need to be codegenned, but we do need to add them to the function
+/// coverage map (in a single designated CGU) so that we still emit coverage mappings for them.
+/// We also end up adding their symbol names to a special global array that LLVM will include in
+/// its embedded coverage data.
+pub(crate) fn prepare_covfun_records_for_unused_functions<'tcx>(
+    cx: &CodegenCx<'_, 'tcx>,
+    covfun_records: &mut Vec<CovfunRecord<'tcx>>,
+) {
+    assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
+
+    let mut unused_instances = gather_unused_function_instances(cx);
+    // Sort the unused instances by symbol name, so that their order isn't hash-sensitive.
+    unused_instances.sort_by_key(|instance| instance.symbol_name);
+
+    // Try to create a covfun record for each unused function.
+    let mut name_globals = Vec::with_capacity(unused_instances.len());
+    covfun_records.extend(unused_instances.into_iter().filter_map(|unused| try {
+        let record = prepare_covfun_record(cx.tcx, unused.instance, false)?;
+        // If successful, also store its symbol name in a global constant.
+        name_globals.push(cx.const_str(unused.symbol_name.name).0);
+        record
+    }));
+
+    // Store the names of unused functions in a specially-named global array.
+    // LLVM's `InstrProfilling` pass will detect this array, and include the
+    // referenced names in its `__llvm_prf_names` section.
+    // (See `llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp`.)
+    if !name_globals.is_empty() {
+        let initializer = cx.const_array(cx.type_ptr(), &name_globals);
+
+        let array = llvm::add_global(cx.llmod, cx.val_ty(initializer), c"__llvm_coverage_names");
+        llvm::set_global_constant(array, true);
+        llvm::set_linkage(array, llvm::Linkage::InternalLinkage);
+        llvm::set_initializer(array, initializer);
+    }
+}
+
+/// Holds a dummy function instance along with its symbol name, to avoid having
+/// to repeatedly query for the name.
+struct UnusedInstance<'tcx> {
+    instance: ty::Instance<'tcx>,
+    symbol_name: ty::SymbolName<'tcx>,
+}
+
+fn gather_unused_function_instances<'tcx>(cx: &CodegenCx<'_, 'tcx>) -> Vec<UnusedInstance<'tcx>> {
+    assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
+
+    let tcx = cx.tcx;
+    let usage = prepare_usage_sets(tcx);
+
+    let is_unused_fn = |def_id: LocalDefId| -> bool {
+        // Usage sets expect `DefId`, so convert from `LocalDefId`.
+        let d: DefId = LocalDefId::to_def_id(def_id);
+        // To be potentially eligible for "unused function" mappings, a definition must:
+        // - Be eligible for coverage instrumentation
+        // - Not participate directly in codegen (or have lost all its coverage statements)
+        // - Not have any coverage statements inlined into codegenned functions
+        tcx.is_eligible_for_coverage(def_id)
+            && (!usage.all_mono_items.contains(&d) || usage.missing_own_coverage.contains(&d))
+            && !usage.used_via_inlining.contains(&d)
+    };
+
+    // FIXME(#79651): Consider trying to filter out dummy instantiations of
+    // unused generic functions from library crates, because they can produce
+    // "unused instantiation" in coverage reports even when they are actually
+    // used by some downstream crate in the same binary.
+
+    tcx.mir_keys(())
+        .iter()
+        .copied()
+        .filter(|&def_id| is_unused_fn(def_id))
+        .map(|def_id| make_dummy_instance(tcx, def_id))
+        .map(|instance| UnusedInstance { instance, symbol_name: tcx.symbol_name(instance) })
+        .collect::<Vec<_>>()
+}
+
+struct UsageSets<'tcx> {
+    all_mono_items: &'tcx DefIdSet,
+    used_via_inlining: FxHashSet<DefId>,
+    missing_own_coverage: FxHashSet<DefId>,
+}
+
+/// Prepare sets of definitions that are relevant to deciding whether something
+/// is an "unused function" for coverage purposes.
+fn prepare_usage_sets<'tcx>(tcx: TyCtxt<'tcx>) -> UsageSets<'tcx> {
+    let MonoItemPartitions { all_mono_items, codegen_units, .. } =
+        tcx.collect_and_partition_mono_items(());
+
+    // Obtain a MIR body for each function participating in codegen, via an
+    // arbitrary instance.
+    let mut def_ids_seen = FxHashSet::default();
+    let def_and_mir_for_all_mono_fns = codegen_units
+        .iter()
+        .flat_map(|cgu| cgu.items().keys())
+        .filter_map(|item| match item {
+            mir::mono::MonoItem::Fn(instance) => Some(instance),
+            mir::mono::MonoItem::Static(_) | mir::mono::MonoItem::GlobalAsm(_) => None,
+        })
+        // We only need one arbitrary instance per definition.
+        .filter(move |instance| def_ids_seen.insert(instance.def_id()))
+        .map(|instance| {
+            // We don't care about the instance, just its underlying MIR.
+            let body = tcx.instance_mir(instance.def);
+            (instance.def_id(), body)
+        });
+
+    // Functions whose coverage statements were found inlined into other functions.
+    let mut used_via_inlining = FxHashSet::default();
+    // Functions that were instrumented, but had all of their coverage statements
+    // removed by later MIR transforms (e.g. UnreachablePropagation).
+    let mut missing_own_coverage = FxHashSet::default();
+
+    for (def_id, body) in def_and_mir_for_all_mono_fns {
+        let mut saw_own_coverage = false;
+
+        // Inspect every coverage statement in the function's MIR.
+        for stmt in body
+            .basic_blocks
+            .iter()
+            .flat_map(|block| &block.statements)
+            .filter(|stmt| matches!(stmt.kind, mir::StatementKind::Coverage(_)))
+        {
+            if let Some(inlined) = stmt.source_info.scope.inlined_instance(&body.source_scopes) {
+                // This coverage statement was inlined from another function.
+                used_via_inlining.insert(inlined.def_id());
+            } else {
+                // Non-inlined coverage statements belong to the enclosing function.
+                saw_own_coverage = true;
+            }
+        }
+
+        if !saw_own_coverage && body.function_coverage_info.is_some() {
+            missing_own_coverage.insert(def_id);
+        }
+    }
+
+    UsageSets { all_mono_items, used_via_inlining, missing_own_coverage }
+}
+
+fn make_dummy_instance<'tcx>(tcx: TyCtxt<'tcx>, local_def_id: LocalDefId) -> ty::Instance<'tcx> {
+    let def_id = local_def_id.to_def_id();
+
+    // Make a dummy instance that fills in all generics with placeholders.
+    ty::Instance::new(
+        def_id,
+        ty::GenericArgs::for_item(tcx, def_id, |param, _| {
+            if let ty::GenericParamDefKind::Lifetime = param.kind {
+                tcx.lifetimes.re_erased.into()
+            } else {
+                tcx.mk_param_from_def(param)
+            }
+        }),
+    )
+}
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs b/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs
index f52991b3697..d2591139d6e 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs
@@ -3,7 +3,6 @@ use std::collections::hash_map::Entry;
 use rustc_codegen_ssa::mir::debuginfo::{DebugScope, FunctionDebugContext};
 use rustc_codegen_ssa::traits::*;
 use rustc_data_structures::fx::FxHashMap;
-use rustc_index::Idx;
 use rustc_index::bit_set::DenseBitSet;
 use rustc_middle::mir::{Body, SourceScope};
 use rustc_middle::ty::layout::{FnAbiOf, HasTypingEnv};
@@ -43,8 +42,7 @@ pub(crate) fn compute_mir_scopes<'ll, 'tcx>(
     let mut instantiated = DenseBitSet::new_empty(mir.source_scopes.len());
     let mut discriminators = FxHashMap::default();
     // Instantiate all scopes.
-    for idx in 0..mir.source_scopes.len() {
-        let scope = SourceScope::new(idx);
+    for scope in mir.source_scopes.indices() {
         make_mir_scope(
             cx,
             instance,
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
index 98d59f5a8ae..7f3e486ca31 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
@@ -2,6 +2,7 @@ use std::borrow::Cow;
 use std::fmt::{self, Write};
 use std::hash::{Hash, Hasher};
 use std::path::{Path, PathBuf};
+use std::sync::Arc;
 use std::{iter, ptr};
 
 use libc::{c_char, c_longlong, c_uint};
@@ -38,8 +39,8 @@ use crate::debuginfo::metadata::type_map::build_type_with_children;
 use crate::debuginfo::utils::{WidePtrKind, wide_pointer_kind};
 use crate::llvm;
 use crate::llvm::debuginfo::{
-    DIDescriptor, DIFile, DIFlags, DILexicalBlock, DIScope, DIType, DebugEmissionKind,
-    DebugNameTableKind,
+    DIBasicType, DIBuilder, DICompositeType, DIDescriptor, DIFile, DIFlags, DILexicalBlock,
+    DIScope, DIType, DebugEmissionKind, DebugNameTableKind,
 };
 use crate::value::Value;
 
@@ -68,7 +69,8 @@ pub(super) const UNKNOWN_COLUMN_NUMBER: c_uint = 0;
 
 const NO_SCOPE_METADATA: Option<&DIScope> = None;
 /// A function that returns an empty list of generic parameter debuginfo nodes.
-const NO_GENERICS: for<'ll> fn(&CodegenCx<'ll, '_>) -> SmallVec<&'ll DIType> = |_| SmallVec::new();
+const NO_GENERICS: for<'ll> fn(&CodegenCx<'ll, '_>) -> SmallVec<Option<&'ll DIType>> =
+    |_| SmallVec::new();
 
 // SmallVec is used quite a bit in this module, so create a shorthand.
 // The actual number of elements is not so important.
@@ -243,7 +245,7 @@ fn build_pointer_or_reference_di_node<'ll, 'tcx>(
                             cx,
                             owner,
                             addr_field_name,
-                            (addr_field.size, addr_field.align.abi),
+                            addr_field,
                             layout.fields.offset(WIDE_PTR_ADDR),
                             DIFlags::FlagZero,
                             data_ptr_type_di_node,
@@ -253,7 +255,7 @@ fn build_pointer_or_reference_di_node<'ll, 'tcx>(
                             cx,
                             owner,
                             extra_field_name,
-                            (extra_field.size, extra_field.align.abi),
+                            extra_field,
                             layout.fields.offset(WIDE_PTR_EXTRA),
                             DIFlags::FlagZero,
                             type_di_node(cx, extra_field.ty),
@@ -311,12 +313,7 @@ fn build_subroutine_type_di_node<'ll, 'tcx>(
 
     debug_context(cx).type_map.unique_id_to_di_node.borrow_mut().remove(&unique_type_id);
 
-    let fn_di_node = unsafe {
-        llvm::LLVMRustDIBuilderCreateSubroutineType(
-            DIB(cx),
-            create_DIArray(DIB(cx), &signature_di_nodes[..]),
-        )
-    };
+    let fn_di_node = create_subroutine_type(cx, create_DIArray(DIB(cx), &signature_di_nodes[..]));
 
     // This is actually a function pointer, so wrap it in pointer DI.
     let name = compute_debuginfo_type_name(cx.tcx, fn_ty, false);
@@ -340,6 +337,13 @@ fn build_subroutine_type_di_node<'ll, 'tcx>(
     DINodeCreationResult::new(di_node, false)
 }
 
+pub(super) fn create_subroutine_type<'ll>(
+    cx: &CodegenCx<'ll, '_>,
+    signature: &'ll DICompositeType,
+) -> &'ll DICompositeType {
+    unsafe { llvm::LLVMRustDIBuilderCreateSubroutineType(DIB(cx), signature) }
+}
+
 /// Create debuginfo for `dyn SomeTrait` types. Currently these are empty structs
 /// we with the correct type name (e.g. "dyn SomeTrait<Foo, Item=u32> + Sync").
 fn build_dyn_type_di_node<'ll, 'tcx>(
@@ -487,26 +491,22 @@ pub(crate) fn type_di_node<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, t: Ty<'tcx>) ->
 // FIXME(mw): Cache this via a regular UniqueTypeId instead of an extra field in the debug context.
 fn recursion_marker_type_di_node<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>) -> &'ll DIType {
     *debug_context(cx).recursion_marker_type.get_or_init(move || {
-        unsafe {
-            // The choice of type here is pretty arbitrary -
-            // anything reading the debuginfo for a recursive
-            // type is going to see *something* weird - the only
-            // question is what exactly it will see.
-            //
-            // FIXME: the name `<recur_type>` does not fit the naming scheme
-            //        of other types.
-            //
-            // FIXME: it might make sense to use an actual pointer type here
-            //        so that debuggers can show the address.
-            let name = "<recur_type>";
-            llvm::LLVMRustDIBuilderCreateBasicType(
-                DIB(cx),
-                name.as_c_char_ptr(),
-                name.len(),
-                cx.tcx.data_layout.pointer_size.bits(),
-                dwarf_const::DW_ATE_unsigned,
-            )
-        }
+        // The choice of type here is pretty arbitrary -
+        // anything reading the debuginfo for a recursive
+        // type is going to see *something* weird - the only
+        // question is what exactly it will see.
+        //
+        // FIXME: the name `<recur_type>` does not fit the naming scheme
+        //        of other types.
+        //
+        // FIXME: it might make sense to use an actual pointer type here
+        //        so that debuggers can show the address.
+        create_basic_type(
+            cx,
+            "<recur_type>",
+            cx.tcx.data_layout.pointer_size,
+            dwarf_const::DW_ATE_unsigned,
+        )
     })
 }
 
@@ -620,42 +620,38 @@ pub(crate) fn file_metadata<'ll>(cx: &CodegenCx<'ll, '_>, source_file: &SourceFi
         let source =
             cx.sess().opts.unstable_opts.embed_source.then_some(()).and(source_file.src.as_ref());
 
-        unsafe {
-            llvm::LLVMRustDIBuilderCreateFile(
-                DIB(cx),
-                file_name.as_c_char_ptr(),
-                file_name.len(),
-                directory.as_c_char_ptr(),
-                directory.len(),
-                hash_kind,
-                hash_value.as_c_char_ptr(),
-                hash_value.len(),
-                source.map_or(ptr::null(), |x| x.as_c_char_ptr()),
-                source.map_or(0, |x| x.len()),
-            )
-        }
+        create_file(DIB(cx), &file_name, &directory, &hash_value, hash_kind, source)
     }
 }
 
 fn unknown_file_metadata<'ll>(cx: &CodegenCx<'ll, '_>) -> &'ll DIFile {
-    debug_context(cx).created_files.borrow_mut().entry(None).or_insert_with(|| unsafe {
-        let file_name = "<unknown>";
-        let directory = "";
-        let hash_value = "";
+    debug_context(cx).created_files.borrow_mut().entry(None).or_insert_with(|| {
+        create_file(DIB(cx), "<unknown>", "", "", llvm::ChecksumKind::None, None)
+    })
+}
 
+fn create_file<'ll>(
+    builder: &DIBuilder<'ll>,
+    file_name: &str,
+    directory: &str,
+    hash_value: &str,
+    hash_kind: llvm::ChecksumKind,
+    source: Option<&Arc<String>>,
+) -> &'ll DIFile {
+    unsafe {
         llvm::LLVMRustDIBuilderCreateFile(
-            DIB(cx),
+            builder,
             file_name.as_c_char_ptr(),
             file_name.len(),
             directory.as_c_char_ptr(),
             directory.len(),
-            llvm::ChecksumKind::None,
+            hash_kind,
             hash_value.as_c_char_ptr(),
             hash_value.len(),
-            ptr::null(),
-            0,
+            source.map_or(ptr::null(), |x| x.as_c_char_ptr()),
+            source.map_or(0, |x| x.len()),
         )
-    })
+    }
 }
 
 trait MsvcBasicName {
@@ -742,7 +738,7 @@ fn build_cpp_f16_di_node<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>) -> DINodeCreation
                 cx,
                 float_di_node,
                 "bits",
-                cx.size_and_align_of(bits_ty),
+                cx.layout_of(bits_ty),
                 Size::ZERO,
                 DIFlags::FlagZero,
                 type_di_node(cx, bits_ty),
@@ -788,15 +784,7 @@ fn build_basic_type_di_node<'ll, 'tcx>(
         _ => bug!("debuginfo::build_basic_type_di_node - `t` is invalid type"),
     };
 
-    let ty_di_node = unsafe {
-        llvm::LLVMRustDIBuilderCreateBasicType(
-            DIB(cx),
-            name.as_c_char_ptr(),
-            name.len(),
-            cx.size_of(t).bits(),
-            encoding,
-        )
-    };
+    let ty_di_node = create_basic_type(cx, name, cx.size_of(t), encoding);
 
     if !cpp_like_debuginfo {
         return DINodeCreationResult::new(ty_di_node, false);
@@ -824,6 +812,23 @@ fn build_basic_type_di_node<'ll, 'tcx>(
     DINodeCreationResult::new(typedef_di_node, false)
 }
 
+fn create_basic_type<'ll, 'tcx>(
+    cx: &CodegenCx<'ll, 'tcx>,
+    name: &str,
+    size: Size,
+    encoding: u32,
+) -> &'ll DIBasicType {
+    unsafe {
+        llvm::LLVMRustDIBuilderCreateBasicType(
+            DIB(cx),
+            name.as_c_char_ptr(),
+            name.len(),
+            size.bits(),
+            encoding,
+        )
+    }
+}
+
 fn build_foreign_type_di_node<'ll, 'tcx>(
     cx: &CodegenCx<'ll, 'tcx>,
     t: Ty<'tcx>,
@@ -905,7 +910,8 @@ pub(crate) fn build_compile_unit_di_node<'ll, 'tcx>(
         && let Some(f) = output_filenames.split_dwarf_path(
             tcx.sess.split_debuginfo(),
             tcx.sess.opts.unstable_opts.split_dwarf_kind,
-            Some(codegen_unit_name),
+            codegen_unit_name,
+            tcx.sess.invocation_temp.as_deref(),
         ) {
         // We get a path relative to the working directory from split_dwarf_path
         Some(tcx.sess.source_map().path_mapping().to_real_filename(f))
@@ -929,17 +935,13 @@ pub(crate) fn build_compile_unit_di_node<'ll, 'tcx>(
     };
 
     unsafe {
-        let compile_unit_file = llvm::LLVMRustDIBuilderCreateFile(
+        let compile_unit_file = create_file(
             debug_context.builder.as_ref(),
-            name_in_debuginfo.as_c_char_ptr(),
-            name_in_debuginfo.len(),
-            work_dir.as_c_char_ptr(),
-            work_dir.len(),
+            &name_in_debuginfo,
+            &work_dir,
+            "",
             llvm::ChecksumKind::None,
-            ptr::null(),
-            0,
-            ptr::null(),
-            0,
+            None,
         );
 
         let unit_metadata = llvm::LLVMRustDIBuilderCreateCompileUnit(
@@ -971,7 +973,7 @@ fn build_field_di_node<'ll, 'tcx>(
     cx: &CodegenCx<'ll, 'tcx>,
     owner: &'ll DIScope,
     name: &str,
-    size_and_align: (Size, Align),
+    layout: TyAndLayout<'tcx>,
     offset: Size,
     flags: DIFlags,
     type_di_node: &'ll DIType,
@@ -983,6 +985,30 @@ fn build_field_di_node<'ll, 'tcx>(
     } else {
         (unknown_file_metadata(cx), UNKNOWN_LINE_NUMBER)
     };
+    create_member_type(
+        cx,
+        owner,
+        name,
+        file_metadata,
+        line_number,
+        layout,
+        offset,
+        flags,
+        type_di_node,
+    )
+}
+
+fn create_member_type<'ll, 'tcx>(
+    cx: &CodegenCx<'ll, 'tcx>,
+    owner: &'ll DIScope,
+    name: &str,
+    file_metadata: &'ll DIType,
+    line_number: u32,
+    layout: TyAndLayout<'tcx>,
+    offset: Size,
+    flags: DIFlags,
+    type_di_node: &'ll DIType,
+) -> &'ll DIType {
     unsafe {
         llvm::LLVMRustDIBuilderCreateMemberType(
             DIB(cx),
@@ -991,8 +1017,8 @@ fn build_field_di_node<'ll, 'tcx>(
             name.len(),
             file_metadata,
             line_number,
-            size_and_align.0.bits(),
-            size_and_align.1.bits() as u32,
+            layout.size.bits(),
+            layout.align.abi.bits() as u32,
             offset.bits(),
             flags,
             type_di_node,
@@ -1076,7 +1102,7 @@ fn build_struct_type_di_node<'ll, 'tcx>(
                         cx,
                         owner,
                         &field_name[..],
-                        (field_layout.size, field_layout.align.abi),
+                        field_layout,
                         struct_type_and_layout.fields.offset(i),
                         visibility_di_flags(cx, f.did, adt_def.did()),
                         type_di_node(cx, field_layout.ty),
@@ -1126,7 +1152,7 @@ fn build_upvar_field_di_nodes<'ll, 'tcx>(
                 cx,
                 closure_or_coroutine_di_node,
                 capture_name.as_str(),
-                cx.size_and_align_of(up_var_ty),
+                cx.layout_of(up_var_ty),
                 layout.fields.offset(index),
                 DIFlags::FlagZero,
                 type_di_node(cx, up_var_ty),
@@ -1171,7 +1197,7 @@ fn build_tuple_type_di_node<'ll, 'tcx>(
                         cx,
                         tuple_di_node,
                         &tuple_field_name(index),
-                        cx.size_and_align_of(component_type),
+                        cx.layout_of(component_type),
                         tuple_type_and_layout.fields.offset(index),
                         DIFlags::FlagZero,
                         type_di_node(cx, component_type),
@@ -1269,7 +1295,7 @@ fn build_union_type_di_node<'ll, 'tcx>(
                         cx,
                         owner,
                         f.name.as_str(),
-                        size_and_align_of(field_layout),
+                        field_layout,
                         Size::ZERO,
                         DIFlags::FlagZero,
                         type_di_node(cx, field_layout.ty),
@@ -1287,7 +1313,7 @@ fn build_union_type_di_node<'ll, 'tcx>(
 fn build_generic_type_param_di_nodes<'ll, 'tcx>(
     cx: &CodegenCx<'ll, 'tcx>,
     ty: Ty<'tcx>,
-) -> SmallVec<&'ll DIType> {
+) -> SmallVec<Option<&'ll DIType>> {
     if let ty::Adt(def, args) = *ty.kind() {
         if args.types().next().is_some() {
             let generics = cx.tcx.generics_of(def.did());
@@ -1297,16 +1323,7 @@ fn build_generic_type_param_di_nodes<'ll, 'tcx>(
                     kind.as_type().map(|ty| {
                         let actual_type = cx.tcx.normalize_erasing_regions(cx.typing_env(), ty);
                         let actual_type_di_node = type_di_node(cx, actual_type);
-                        let name = name.as_str();
-                        unsafe {
-                            llvm::LLVMRustDIBuilderCreateTemplateTypeParameter(
-                                DIB(cx),
-                                None,
-                                name.as_c_char_ptr(),
-                                name.len(),
-                                actual_type_di_node,
-                            )
-                        }
+                        Some(cx.create_template_type_parameter(name.as_str(), actual_type_di_node))
                     })
                 })
                 .collect();
@@ -1416,7 +1433,9 @@ fn build_vtable_type_di_node<'ll, 'tcx>(
     let void_pointer_ty = Ty::new_imm_ptr(tcx, tcx.types.unit);
     let void_pointer_type_di_node = type_di_node(cx, void_pointer_ty);
     let usize_di_node = type_di_node(cx, tcx.types.usize);
-    let (pointer_size, pointer_align) = cx.size_and_align_of(void_pointer_ty);
+    let pointer_layout = cx.layout_of(void_pointer_ty);
+    let pointer_size = pointer_layout.size;
+    let pointer_align = pointer_layout.align.abi;
     // If `usize` is not pointer-sized and -aligned then the size and alignment computations
     // for the vtable as a whole would be wrong. Let's make sure this holds even on weird
     // platforms.
@@ -1472,7 +1491,7 @@ fn build_vtable_type_di_node<'ll, 'tcx>(
                         cx,
                         vtable_type_di_node,
                         &field_name,
-                        (pointer_size, pointer_align),
+                        pointer_layout,
                         field_offset,
                         DIFlags::FlagZero,
                         field_type_di_node,
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs
index a72e205c9b2..07075be55fa 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs
@@ -17,8 +17,8 @@ use crate::debuginfo::metadata::enums::DiscrResult;
 use crate::debuginfo::metadata::type_map::{self, Stub, UniqueTypeId};
 use crate::debuginfo::metadata::{
     DINodeCreationResult, NO_GENERICS, NO_SCOPE_METADATA, SmallVec, UNKNOWN_LINE_NUMBER,
-    build_field_di_node, file_metadata, file_metadata_from_def_id, size_and_align_of, type_di_node,
-    unknown_file_metadata, visibility_di_flags,
+    build_field_di_node, create_member_type, file_metadata, file_metadata_from_def_id,
+    size_and_align_of, type_di_node, unknown_file_metadata, visibility_di_flags,
 };
 use crate::debuginfo::utils::DIB;
 use crate::llvm::debuginfo::{DIFile, DIFlags, DIType};
@@ -370,9 +370,9 @@ fn build_single_variant_union_fields<'ll, 'tcx>(
             cx,
             enum_type_di_node,
             &variant_union_field_name(variant_index),
-            // NOTE: We use the size and align of the entire type, not from variant_layout
+            // NOTE: We use the layout of the entire type, not from variant_layout
             //       since the later is sometimes smaller (if it has fewer fields).
-            size_and_align_of(enum_type_and_layout),
+            enum_type_and_layout,
             Size::ZERO,
             visibility_flags,
             variant_struct_type_wrapper_di_node,
@@ -560,7 +560,7 @@ fn build_variant_struct_wrapper_type_di_node<'ll, 'tcx>(
                 cx,
                 wrapper_struct_type_di_node,
                 "value",
-                size_and_align_of(enum_or_coroutine_type_and_layout),
+                enum_or_coroutine_type_and_layout,
                 Size::ZERO,
                 DIFlags::FlagZero,
                 variant_struct_type_di_node,
@@ -820,7 +820,6 @@ fn build_union_fields_for_direct_tag_enum_or_coroutine<'ll, 'tcx>(
             .unwrap_or_else(|| (unknown_file_metadata(cx), UNKNOWN_LINE_NUMBER));
 
         let field_name = variant_union_field_name(variant_member_info.variant_index);
-        let (size, align) = size_and_align_of(enum_type_and_layout);
 
         let variant_struct_type_wrapper = build_variant_struct_wrapper_type_di_node(
             cx,
@@ -840,27 +839,23 @@ fn build_union_fields_for_direct_tag_enum_or_coroutine<'ll, 'tcx>(
             },
         );
 
-        // We use LLVMRustDIBuilderCreateMemberType() member type directly because
+        // We use create_member_type() member type directly because
         // the build_field_di_node() function does not support specifying a source location,
         // which is something that we don't do anywhere else.
-        unsafe {
-            llvm::LLVMRustDIBuilderCreateMemberType(
-                DIB(cx),
-                enum_type_di_node,
-                field_name.as_c_char_ptr(),
-                field_name.len(),
-                file_di_node,
-                line_number,
-                // NOTE: We use the size and align of the entire type, not from variant_layout
-                //       since the later is sometimes smaller (if it has fewer fields).
-                size.bits(),
-                align.bits() as u32,
-                // Union fields are always at offset zero
-                Size::ZERO.bits(),
-                di_flags,
-                variant_struct_type_wrapper,
-            )
-        }
+        create_member_type(
+            cx,
+            enum_type_di_node,
+            &field_name,
+            file_di_node,
+            line_number,
+            // NOTE: We use the layout of the entire type, not from variant_layout
+            //       since the later is sometimes smaller (if it has fewer fields).
+            enum_type_and_layout,
+            // Union fields are always at offset zero
+            Size::ZERO,
+            di_flags,
+            variant_struct_type_wrapper,
+        )
     }));
 
     assert_eq!(
@@ -874,7 +869,7 @@ fn build_union_fields_for_direct_tag_enum_or_coroutine<'ll, 'tcx>(
 
     if is_128_bits {
         let type_di_node = type_di_node(cx, cx.tcx.types.u64);
-        let size_and_align = cx.size_and_align_of(cx.tcx.types.u64);
+        let u64_layout = cx.layout_of(cx.tcx.types.u64);
 
         let (lo_offset, hi_offset) = match cx.tcx.data_layout.endian {
             Endian::Little => (0, 8),
@@ -889,7 +884,7 @@ fn build_union_fields_for_direct_tag_enum_or_coroutine<'ll, 'tcx>(
             cx,
             enum_type_di_node,
             TAG_FIELD_NAME_128_LO,
-            size_and_align,
+            u64_layout,
             lo_offset,
             di_flags,
             type_di_node,
@@ -900,7 +895,7 @@ fn build_union_fields_for_direct_tag_enum_or_coroutine<'ll, 'tcx>(
             cx,
             enum_type_di_node,
             TAG_FIELD_NAME_128_HI,
-            size_and_align,
+            u64_layout,
             hi_offset,
             DIFlags::FlagZero,
             type_di_node,
@@ -911,7 +906,7 @@ fn build_union_fields_for_direct_tag_enum_or_coroutine<'ll, 'tcx>(
             cx,
             enum_type_di_node,
             TAG_FIELD_NAME,
-            cx.size_and_align_of(enum_type_and_layout.field(cx, tag_field).ty),
+            enum_type_and_layout.field(cx, tag_field),
             enum_type_and_layout.fields.offset(tag_field),
             di_flags,
             tag_base_type_di_node,
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs
index 9f6a5cc89e0..7c701926d2c 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs
@@ -249,7 +249,7 @@ fn build_enum_variant_struct_type_di_node<'ll, 'tcx>(
                         cx,
                         struct_type_di_node,
                         &field_name,
-                        (field_layout.size, field_layout.align.abi),
+                        field_layout,
                         variant_layout.fields.offset(field_index),
                         di_flags,
                         type_di_node(cx, field_layout.ty),
@@ -332,7 +332,7 @@ fn build_coroutine_variant_struct_type_di_node<'ll, 'tcx>(
                         cx,
                         variant_struct_type_di_node,
                         &field_name,
-                        cx.size_and_align_of(field_type),
+                        cx.layout_of(field_type),
                         variant_layout.fields.offset(field_index),
                         DIFlags::FlagZero,
                         type_di_node(cx, field_type),
@@ -352,7 +352,7 @@ fn build_coroutine_variant_struct_type_di_node<'ll, 'tcx>(
                         cx,
                         variant_struct_type_di_node,
                         upvar_name.as_str(),
-                        cx.size_and_align_of(upvar_ty),
+                        cx.layout_of(upvar_ty),
                         coroutine_type_and_layout.fields.offset(index),
                         DIFlags::FlagZero,
                         type_di_node(cx, upvar_ty),
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs
index 187d97c54c8..bfd131cfd3d 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs
@@ -13,9 +13,9 @@ use smallvec::smallvec;
 use crate::common::{AsCCharPtr, CodegenCx};
 use crate::debuginfo::metadata::type_map::{self, Stub, StubInfo, UniqueTypeId};
 use crate::debuginfo::metadata::{
-    DINodeCreationResult, NO_GENERICS, SmallVec, UNKNOWN_LINE_NUMBER, file_metadata,
-    file_metadata_from_def_id, size_and_align_of, type_di_node, unknown_file_metadata,
-    visibility_di_flags,
+    DINodeCreationResult, NO_GENERICS, SmallVec, UNKNOWN_LINE_NUMBER, create_member_type,
+    file_metadata, file_metadata_from_def_id, size_and_align_of, type_di_node,
+    unknown_file_metadata, visibility_di_flags,
 };
 use crate::debuginfo::utils::{DIB, create_DIArray, get_namespace_for_item};
 use crate::llvm::debuginfo::{DIFile, DIFlags, DIType};
@@ -363,23 +363,22 @@ fn build_discr_member_di_node<'ll, 'tcx>(
 
         &Variants::Multiple { tag_field, .. } => {
             let tag_base_type = tag_base_type(cx.tcx, enum_or_coroutine_type_and_layout);
-            let (size, align) = cx.size_and_align_of(tag_base_type);
-
-            unsafe {
-                Some(llvm::LLVMRustDIBuilderCreateMemberType(
-                    DIB(cx),
-                    containing_scope,
-                    tag_name.as_c_char_ptr(),
-                    tag_name.len(),
-                    unknown_file_metadata(cx),
-                    UNKNOWN_LINE_NUMBER,
-                    size.bits(),
-                    align.bits() as u32,
-                    enum_or_coroutine_type_and_layout.fields.offset(tag_field).bits(),
-                    DIFlags::FlagArtificial,
-                    type_di_node(cx, tag_base_type),
-                ))
-            }
+            let ty = type_di_node(cx, tag_base_type);
+            let file = unknown_file_metadata(cx);
+
+            let layout = cx.layout_of(tag_base_type);
+
+            Some(create_member_type(
+                cx,
+                containing_scope,
+                &tag_name,
+                file,
+                UNKNOWN_LINE_NUMBER,
+                layout,
+                enum_or_coroutine_type_and_layout.fields.offset(tag_field),
+                DIFlags::FlagArtificial,
+                ty,
+            ))
         }
     }
 }
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs
index af1d503ad6a..56fb12d3c22 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs
@@ -247,6 +247,16 @@ pub(super) fn stub<'ll, 'tcx>(
     StubInfo { metadata, unique_type_id }
 }
 
+struct AdtStackPopGuard<'ll, 'tcx, 'a> {
+    cx: &'a CodegenCx<'ll, 'tcx>,
+}
+
+impl<'ll, 'tcx, 'a> Drop for AdtStackPopGuard<'ll, 'tcx, 'a> {
+    fn drop(&mut self) {
+        debug_context(self.cx).adt_stack.borrow_mut().pop();
+    }
+}
+
 /// This function enables creating debuginfo nodes that can recursively refer to themselves.
 /// It will first insert the given stub into the type map and only then execute the `members`
 /// and `generics` closures passed in. These closures have access to the stub so they can
@@ -257,16 +267,53 @@ pub(super) fn build_type_with_children<'ll, 'tcx>(
     cx: &CodegenCx<'ll, 'tcx>,
     stub_info: StubInfo<'ll, 'tcx>,
     members: impl FnOnce(&CodegenCx<'ll, 'tcx>, &'ll DIType) -> SmallVec<&'ll DIType>,
-    generics: impl FnOnce(&CodegenCx<'ll, 'tcx>) -> SmallVec<&'ll DIType>,
+    generics: impl FnOnce(&CodegenCx<'ll, 'tcx>) -> SmallVec<Option<&'ll DIType>>,
 ) -> DINodeCreationResult<'ll> {
     assert_eq!(debug_context(cx).type_map.di_node_for_unique_id(stub_info.unique_type_id), None);
 
+    let mut _adt_stack_pop_guard = None;
+    if let UniqueTypeId::Ty(ty, ..) = stub_info.unique_type_id
+        && let ty::Adt(adt_def, args) = ty.kind()
+    {
+        let def_id = adt_def.did();
+        // If any sub type reference the original type definition and the sub type has a type
+        // parameter that strictly contains the original parameter, the original type is a recursive
+        // type that can expanding indefinitely. Example,
+        // ```
+        // enum Recursive<T> {
+        //     Recurse(*const Recursive<Wrap<T>>),
+        //     Item(T),
+        // }
+        // ```
+        let is_expanding_recursive =
+            debug_context(cx).adt_stack.borrow().iter().any(|(parent_def_id, parent_args)| {
+                if def_id == *parent_def_id {
+                    args.iter().zip(parent_args.iter()).any(|(arg, parent_arg)| {
+                        if let (Some(arg), Some(parent_arg)) = (arg.as_type(), parent_arg.as_type())
+                        {
+                            arg != parent_arg && arg.contains(parent_arg)
+                        } else {
+                            false
+                        }
+                    })
+                } else {
+                    false
+                }
+            });
+        if is_expanding_recursive {
+            // FIXME: indicate that this is an expanding recursive type in stub metadata?
+            return DINodeCreationResult::new(stub_info.metadata, false);
+        } else {
+            debug_context(cx).adt_stack.borrow_mut().push((def_id, args));
+            _adt_stack_pop_guard = Some(AdtStackPopGuard { cx });
+        }
+    }
+
     debug_context(cx).type_map.insert(stub_info.unique_type_id, stub_info.metadata);
 
     let members: SmallVec<_> =
         members(cx, stub_info.metadata).into_iter().map(|node| Some(node)).collect();
-    let generics: SmallVec<Option<&'ll DIType>> =
-        generics(cx).into_iter().map(|node| Some(node)).collect();
+    let generics = generics(cx);
 
     if !(members.is_empty() && generics.is_empty()) {
         unsafe {
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
index 10819a53b1d..c5085927923 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
@@ -6,6 +6,7 @@ use std::sync::Arc;
 use std::{iter, ptr};
 
 use libc::c_uint;
+use metadata::create_subroutine_type;
 use rustc_abi::Size;
 use rustc_codegen_ssa::debuginfo::type_names;
 use rustc_codegen_ssa::mir::debuginfo::VariableKind::*;
@@ -34,8 +35,8 @@ use crate::builder::Builder;
 use crate::common::{AsCCharPtr, CodegenCx};
 use crate::llvm;
 use crate::llvm::debuginfo::{
-    DIArray, DIBuilderBox, DIFile, DIFlags, DILexicalBlock, DILocation, DISPFlags, DIScope, DIType,
-    DIVariable,
+    DIArray, DIBuilderBox, DIFile, DIFlags, DILexicalBlock, DILocation, DISPFlags, DIScope,
+    DITemplateTypeParameter, DIType, DIVariable,
 };
 use crate::value::Value;
 
@@ -65,6 +66,7 @@ pub(crate) struct CodegenUnitDebugContext<'ll, 'tcx> {
     created_files: RefCell<UnordMap<Option<(StableSourceFileId, SourceFileHash)>, &'ll DIFile>>,
 
     type_map: metadata::TypeMap<'ll, 'tcx>,
+    adt_stack: RefCell<Vec<(DefId, GenericArgsRef<'tcx>)>>,
     namespace_map: RefCell<DefIdMap<&'ll DIScope>>,
     recursion_marker_type: OnceCell<&'ll DIType>,
 }
@@ -79,6 +81,7 @@ impl<'ll, 'tcx> CodegenUnitDebugContext<'ll, 'tcx> {
             builder,
             created_files: Default::default(),
             type_map: Default::default(),
+            adt_stack: Default::default(),
             namespace_map: RefCell::new(Default::default()),
             recursion_marker_type: OnceCell::new(),
         }
@@ -251,7 +254,7 @@ struct DebugLoc {
     col: u32,
 }
 
-impl CodegenCx<'_, '_> {
+impl<'ll> CodegenCx<'ll, '_> {
     /// Looks up debug source information about a `BytePos`.
     // FIXME(eddyb) rename this to better indicate it's a duplicate of
     // `lookup_char_pos` rather than `dbg_loc`, perhaps by making
@@ -279,6 +282,22 @@ impl CodegenCx<'_, '_> {
             DebugLoc { file, line, col }
         }
     }
+
+    fn create_template_type_parameter(
+        &self,
+        name: &str,
+        actual_type_metadata: &'ll DIType,
+    ) -> &'ll DITemplateTypeParameter {
+        unsafe {
+            llvm::LLVMRustDIBuilderCreateTemplateTypeParameter(
+                DIB(self),
+                None,
+                name.as_c_char_ptr(),
+                name.len(),
+                actual_type_metadata,
+            )
+        }
+    }
 }
 
 impl<'ll, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
@@ -325,10 +344,8 @@ impl<'ll, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
         let loc = self.lookup_debug_loc(span.lo());
         let file_metadata = file_metadata(self, &loc.file);
 
-        let function_type_metadata = unsafe {
-            let fn_signature = get_function_signature(self, fn_abi);
-            llvm::LLVMRustDIBuilderCreateSubroutineType(DIB(self), fn_signature)
-        };
+        let function_type_metadata =
+            create_subroutine_type(self, get_function_signature(self, fn_abi));
 
         let mut name = String::with_capacity(64);
         type_names::push_item_name(tcx, def_id, false, &mut name);
@@ -483,16 +500,10 @@ impl<'ll, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
                         kind.as_type().map(|ty| {
                             let actual_type = cx.tcx.normalize_erasing_regions(cx.typing_env(), ty);
                             let actual_type_metadata = type_di_node(cx, actual_type);
-                            let name = name.as_str();
-                            unsafe {
-                                Some(llvm::LLVMRustDIBuilderCreateTemplateTypeParameter(
-                                    DIB(cx),
-                                    None,
-                                    name.as_c_char_ptr(),
-                                    name.len(),
-                                    actual_type_metadata,
-                                ))
-                            }
+                            Some(cx.create_template_type_parameter(
+                                name.as_str(),
+                                actual_type_metadata,
+                            ))
                         })
                     })
                     .collect()
diff --git a/compiler/rustc_codegen_llvm/src/errors.rs b/compiler/rustc_codegen_llvm/src/errors.rs
index 4c5a78ca74f..ecf108f988f 100644
--- a/compiler/rustc_codegen_llvm/src/errors.rs
+++ b/compiler/rustc_codegen_llvm/src/errors.rs
@@ -217,3 +217,7 @@ pub(crate) struct MismatchedDataLayout<'a> {
 pub(crate) struct FixedX18InvalidArch<'a> {
     pub arch: &'a str,
 }
+
+#[derive(Diagnostic)]
+#[diag(codegen_llvm_sanitizer_kcfi_arity_requires_llvm_21_0_0)]
+pub(crate) struct SanitizerKcfiArityRequiresLLVM2100;
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index 67135fcc308..ffeab59b05c 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -1184,18 +1184,6 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         }};
     }
 
-    /// Returns the bitwidth of the `$ty` argument if it is an `Int` type.
-    macro_rules! require_int_ty {
-        ($ty: expr, $diag: expr) => {
-            match $ty {
-                ty::Int(i) => i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits()),
-                _ => {
-                    return_error!($diag);
-                }
-            }
-        };
-    }
-
     /// Returns the bitwidth of the `$ty` argument if it is an `Int` or `Uint` type.
     macro_rules! require_int_or_uint_ty {
         ($ty: expr, $diag: expr) => {
@@ -1421,7 +1409,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         return Ok(bx.shuffle_vector(args[0].immediate(), args[1].immediate(), indices));
     }
 
-    if name == sym::simd_insert {
+    if name == sym::simd_insert || name == sym::simd_insert_dyn {
         require!(
             in_elem == arg_tys[2],
             InvalidMonomorphization::InsertedType {
@@ -1432,40 +1420,49 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
                 out_ty: arg_tys[2]
             }
         );
-        let idx = bx
-            .const_to_opt_u128(args[1].immediate(), false)
-            .expect("typeck should have ensure that this is a const");
-        if idx >= in_len.into() {
-            return_error!(InvalidMonomorphization::SimdIndexOutOfBounds {
-                span,
-                name,
-                arg_idx: 1,
-                total_len: in_len.into(),
-            });
-        }
-        return Ok(bx.insert_element(
-            args[0].immediate(),
-            args[2].immediate(),
-            bx.const_i32(idx as i32),
-        ));
+
+        let index_imm = if name == sym::simd_insert {
+            let idx = bx
+                .const_to_opt_u128(args[1].immediate(), false)
+                .expect("typeck should have ensure that this is a const");
+            if idx >= in_len.into() {
+                return_error!(InvalidMonomorphization::SimdIndexOutOfBounds {
+                    span,
+                    name,
+                    arg_idx: 1,
+                    total_len: in_len.into(),
+                });
+            }
+            bx.const_i32(idx as i32)
+        } else {
+            args[1].immediate()
+        };
+
+        return Ok(bx.insert_element(args[0].immediate(), args[2].immediate(), index_imm));
     }
-    if name == sym::simd_extract {
+    if name == sym::simd_extract || name == sym::simd_extract_dyn {
         require!(
             ret_ty == in_elem,
             InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
         );
-        let idx = bx
-            .const_to_opt_u128(args[1].immediate(), false)
-            .expect("typeck should have ensure that this is a const");
-        if idx >= in_len.into() {
-            return_error!(InvalidMonomorphization::SimdIndexOutOfBounds {
-                span,
-                name,
-                arg_idx: 1,
-                total_len: in_len.into(),
-            });
-        }
-        return Ok(bx.extract_element(args[0].immediate(), bx.const_i32(idx as i32)));
+        let index_imm = if name == sym::simd_extract {
+            let idx = bx
+                .const_to_opt_u128(args[1].immediate(), false)
+                .expect("typeck should have ensure that this is a const");
+            if idx >= in_len.into() {
+                return_error!(InvalidMonomorphization::SimdIndexOutOfBounds {
+                    span,
+                    name,
+                    arg_idx: 1,
+                    total_len: in_len.into(),
+                });
+            }
+            bx.const_i32(idx as i32)
+        } else {
+            args[1].immediate()
+        };
+
+        return Ok(bx.extract_element(args[0].immediate(), index_imm));
     }
 
     if name == sym::simd_select {
@@ -1476,9 +1473,9 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             m_len == v_len,
             InvalidMonomorphization::MismatchedLengths { span, name, m_len, v_len }
         );
-        let in_elem_bitwidth = require_int_ty!(
+        let in_elem_bitwidth = require_int_or_uint_ty!(
             m_elem_ty.kind(),
-            InvalidMonomorphization::MaskType { span, name, ty: m_elem_ty }
+            InvalidMonomorphization::MaskWrongElementType { span, name, ty: m_elem_ty }
         );
         let m_i1s = vector_mask_to_bitmask(bx, args[0].immediate(), in_elem_bitwidth, m_len);
         return Ok(bx.select(m_i1s, args[1].immediate(), args[2].immediate()));
@@ -1499,7 +1496,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         // Integer vector <i{in_bitwidth} x in_len>:
         let in_elem_bitwidth = require_int_or_uint_ty!(
             in_elem.kind(),
-            InvalidMonomorphization::VectorArgument { span, name, in_ty, in_elem }
+            InvalidMonomorphization::MaskWrongElementType { span, name, ty: in_elem }
         );
 
         let i1xn = vector_mask_to_bitmask(bx, args[0].immediate(), in_elem_bitwidth, in_len);
@@ -1723,14 +1720,9 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             }
         );
 
-        let mask_elem_bitwidth = require_int_ty!(
+        let mask_elem_bitwidth = require_int_or_uint_ty!(
             element_ty2.kind(),
-            InvalidMonomorphization::ThirdArgElementType {
-                span,
-                name,
-                expected_element: element_ty2,
-                third_arg: arg_tys[2]
-            }
+            InvalidMonomorphization::MaskWrongElementType { span, name, ty: element_ty2 }
         );
 
         // Alignment of T, must be a constant integer value:
@@ -1825,14 +1817,9 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             }
         );
 
-        let m_elem_bitwidth = require_int_ty!(
+        let m_elem_bitwidth = require_int_or_uint_ty!(
             mask_elem.kind(),
-            InvalidMonomorphization::ThirdArgElementType {
-                span,
-                name,
-                expected_element: values_elem,
-                third_arg: mask_ty,
-            }
+            InvalidMonomorphization::MaskWrongElementType { span, name, ty: mask_elem }
         );
 
         let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len);
@@ -1915,14 +1902,9 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             }
         );
 
-        let m_elem_bitwidth = require_int_ty!(
+        let m_elem_bitwidth = require_int_or_uint_ty!(
             mask_elem.kind(),
-            InvalidMonomorphization::ThirdArgElementType {
-                span,
-                name,
-                expected_element: values_elem,
-                third_arg: mask_ty,
-            }
+            InvalidMonomorphization::MaskWrongElementType { span, name, ty: mask_elem }
         );
 
         let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len);
@@ -2010,15 +1992,10 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             }
         );
 
-        // The element type of the third argument must be a signed integer type of any width:
-        let mask_elem_bitwidth = require_int_ty!(
+        // The element type of the third argument must be an integer type of any width:
+        let mask_elem_bitwidth = require_int_or_uint_ty!(
             element_ty2.kind(),
-            InvalidMonomorphization::ThirdArgElementType {
-                span,
-                name,
-                expected_element: element_ty2,
-                third_arg: arg_tys[2]
-            }
+            InvalidMonomorphization::MaskWrongElementType { span, name, ty: element_ty2 }
         );
 
         // Alignment of T, must be a constant integer value:
diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
index f622646a5d9..425381b0ffa 100644
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -29,7 +29,7 @@ use back::owned_target_machine::OwnedTargetMachine;
 use back::write::{create_informational_target_machine, create_target_machine};
 use context::SimpleCx;
 use errors::{AutoDiffWithoutLTO, ParseTargetMachineConfig};
-pub(crate) use llvm_util::target_features_cfg;
+use llvm_util::target_features_cfg;
 use rustc_ast::expand::allocator::AllocatorKind;
 use rustc_ast::expand::autodiff_attrs::AutoDiffItem;
 use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule};
@@ -71,9 +71,7 @@ mod debuginfo;
 mod declare;
 mod errors;
 mod intrinsic;
-// FIXME(Zalathar): Fix all the unreachable-pub warnings that would occur if
-// this isn't pub, then make it not pub.
-pub mod llvm;
+mod llvm;
 mod llvm_util;
 mod mono_item;
 mod type_;
diff --git a/compiler/rustc_codegen_llvm/src/llvm/archive_ro.rs b/compiler/rustc_codegen_llvm/src/llvm/archive_ro.rs
index 63b2b15c514..51bcc4d123d 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/archive_ro.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/archive_ro.rs
@@ -5,17 +5,17 @@ use std::{slice, str};
 
 use rustc_fs_util::path_to_c_string;
 
-pub struct ArchiveRO {
+pub(crate) struct ArchiveRO {
     pub raw: &'static mut super::Archive,
 }
 
 unsafe impl Send for ArchiveRO {}
 
-pub struct Iter<'a> {
+pub(crate) struct Iter<'a> {
     raw: &'a mut super::ArchiveIterator<'a>,
 }
 
-pub struct Child<'a> {
+pub(crate) struct Child<'a> {
     pub raw: &'a mut super::ArchiveChild<'a>,
 }
 
diff --git a/compiler/rustc_codegen_llvm/src/llvm/diagnostic.rs b/compiler/rustc_codegen_llvm/src/llvm/diagnostic.rs
index 11043b664f5..0e0f2b0eab0 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/diagnostic.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/diagnostic.rs
@@ -3,13 +3,13 @@
 use libc::c_uint;
 use rustc_span::InnerSpan;
 
-pub use self::Diagnostic::*;
-pub use self::OptimizationDiagnosticKind::*;
+pub(crate) use self::Diagnostic::*;
+use self::OptimizationDiagnosticKind::*;
 use super::{DiagnosticInfo, SMDiagnostic};
 use crate::value::Value;
 
 #[derive(Copy, Clone, Debug)]
-pub enum OptimizationDiagnosticKind {
+pub(crate) enum OptimizationDiagnosticKind {
     OptimizationRemark,
     OptimizationMissed,
     OptimizationAnalysis,
@@ -19,9 +19,10 @@ pub enum OptimizationDiagnosticKind {
     OptimizationRemarkOther,
 }
 
-pub struct OptimizationDiagnostic<'ll> {
+pub(crate) struct OptimizationDiagnostic<'ll> {
     pub kind: OptimizationDiagnosticKind,
     pub pass_name: String,
+    #[expect(dead_code)]
     pub function: &'ll Value,
     pub line: c_uint,
     pub column: c_uint,
@@ -73,14 +74,14 @@ impl<'ll> OptimizationDiagnostic<'ll> {
     }
 }
 
-pub struct SrcMgrDiagnostic {
+pub(crate) struct SrcMgrDiagnostic {
     pub level: super::DiagnosticLevel,
     pub message: String,
     pub source: Option<(String, Vec<InnerSpan>)>,
 }
 
 impl SrcMgrDiagnostic {
-    pub unsafe fn unpack(diag: &SMDiagnostic) -> SrcMgrDiagnostic {
+    pub(crate) unsafe fn unpack(diag: &SMDiagnostic) -> SrcMgrDiagnostic {
         // Recover the post-substitution assembly code from LLVM for better
         // diagnostics.
         let mut have_source = false;
@@ -120,7 +121,7 @@ impl SrcMgrDiagnostic {
 }
 
 #[derive(Clone)]
-pub struct InlineAsmDiagnostic {
+pub(crate) struct InlineAsmDiagnostic {
     pub level: super::DiagnosticLevel,
     pub cookie: u64,
     pub message: String,
@@ -158,7 +159,7 @@ impl InlineAsmDiagnostic {
     }
 }
 
-pub enum Diagnostic<'ll> {
+pub(crate) enum Diagnostic<'ll> {
     Optimization(OptimizationDiagnostic<'ll>),
     InlineAsm(InlineAsmDiagnostic),
     PGO(&'ll DiagnosticInfo),
@@ -166,11 +167,12 @@ pub enum Diagnostic<'ll> {
     Unsupported(&'ll DiagnosticInfo),
 
     /// LLVM has other types that we do not wrap here.
+    #[expect(dead_code)]
     UnknownDiagnostic(&'ll DiagnosticInfo),
 }
 
 impl<'ll> Diagnostic<'ll> {
-    pub unsafe fn unpack(di: &'ll DiagnosticInfo) -> Self {
+    pub(crate) unsafe fn unpack(di: &'ll DiagnosticInfo) -> Self {
         use super::DiagnosticKind as Dk;
 
         unsafe {
diff --git a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
index f6b23862907..a9b3bdf7344 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
@@ -4,7 +4,7 @@
 use libc::{c_char, c_uint};
 
 use super::MetadataKindId;
-use super::ffi::{BasicBlock, Metadata, Module, Type, Value};
+use super::ffi::{AttributeKind, BasicBlock, Metadata, Module, Type, Value};
 use crate::llvm::Bool;
 
 #[link(name = "llvm-wrapper", kind = "static")]
@@ -17,6 +17,8 @@ unsafe extern "C" {
     pub(crate) fn LLVMRustEraseInstFromParent(V: &Value);
     pub(crate) fn LLVMRustGetTerminator<'a>(B: &BasicBlock) -> &'a Value;
     pub(crate) fn LLVMRustVerifyFunction(V: &Value, action: LLVMRustVerifierFailureAction) -> Bool;
+    pub(crate) fn LLVMRustHasAttributeAtIndex(V: &Value, i: c_uint, Kind: AttributeKind) -> bool;
+    pub(crate) fn LLVMRustGetArrayNumElements(Ty: &Type) -> u64;
 }
 
 unsafe extern "C" {
@@ -31,20 +33,20 @@ unsafe extern "C" {
 
 #[repr(C)]
 #[derive(Copy, Clone, PartialEq)]
-pub enum LLVMRustVerifierFailureAction {
+pub(crate) enum LLVMRustVerifierFailureAction {
     LLVMAbortProcessAction = 0,
     LLVMPrintMessageAction = 1,
     LLVMReturnStatusAction = 2,
 }
 
 #[cfg(llvm_enzyme)]
-pub use self::Enzyme_AD::*;
+pub(crate) use self::Enzyme_AD::*;
 
 #[cfg(llvm_enzyme)]
-pub mod Enzyme_AD {
+pub(crate) mod Enzyme_AD {
     use libc::c_void;
     unsafe extern "C" {
-        pub fn EnzymeSetCLBool(arg1: *mut ::std::os::raw::c_void, arg2: u8);
+        pub(crate) fn EnzymeSetCLBool(arg1: *mut ::std::os::raw::c_void, arg2: u8);
     }
     unsafe extern "C" {
         static mut EnzymePrintPerf: c_void;
@@ -56,42 +58,42 @@ pub mod Enzyme_AD {
         static mut EnzymeInline: c_void;
         static mut RustTypeRules: c_void;
     }
-    pub fn set_print_perf(print: bool) {
+    pub(crate) fn set_print_perf(print: bool) {
         unsafe {
             EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrintPerf), print as u8);
         }
     }
-    pub fn set_print_activity(print: bool) {
+    pub(crate) fn set_print_activity(print: bool) {
         unsafe {
             EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrintActivity), print as u8);
         }
     }
-    pub fn set_print_type(print: bool) {
+    pub(crate) fn set_print_type(print: bool) {
         unsafe {
             EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrintType), print as u8);
         }
     }
-    pub fn set_print(print: bool) {
+    pub(crate) fn set_print(print: bool) {
         unsafe {
             EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrint), print as u8);
         }
     }
-    pub fn set_strict_aliasing(strict: bool) {
+    pub(crate) fn set_strict_aliasing(strict: bool) {
         unsafe {
             EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymeStrictAliasing), strict as u8);
         }
     }
-    pub fn set_loose_types(loose: bool) {
+    pub(crate) fn set_loose_types(loose: bool) {
         unsafe {
             EnzymeSetCLBool(std::ptr::addr_of_mut!(looseTypeAnalysis), loose as u8);
         }
     }
-    pub fn set_inline(val: bool) {
+    pub(crate) fn set_inline(val: bool) {
         unsafe {
             EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymeInline), val as u8);
         }
     }
-    pub fn set_rust_rules(val: bool) {
+    pub(crate) fn set_rust_rules(val: bool) {
         unsafe {
             EnzymeSetCLBool(std::ptr::addr_of_mut!(RustTypeRules), val as u8);
         }
@@ -99,34 +101,34 @@ pub mod Enzyme_AD {
 }
 
 #[cfg(not(llvm_enzyme))]
-pub use self::Fallback_AD::*;
+pub(crate) use self::Fallback_AD::*;
 
 #[cfg(not(llvm_enzyme))]
-pub mod Fallback_AD {
+pub(crate) mod Fallback_AD {
     #![allow(unused_variables)]
 
-    pub fn set_inline(val: bool) {
+    pub(crate) fn set_inline(val: bool) {
         unimplemented!()
     }
-    pub fn set_print_perf(print: bool) {
+    pub(crate) fn set_print_perf(print: bool) {
         unimplemented!()
     }
-    pub fn set_print_activity(print: bool) {
+    pub(crate) fn set_print_activity(print: bool) {
         unimplemented!()
     }
-    pub fn set_print_type(print: bool) {
+    pub(crate) fn set_print_type(print: bool) {
         unimplemented!()
     }
-    pub fn set_print(print: bool) {
+    pub(crate) fn set_print(print: bool) {
         unimplemented!()
     }
-    pub fn set_strict_aliasing(strict: bool) {
+    pub(crate) fn set_strict_aliasing(strict: bool) {
         unimplemented!()
     }
-    pub fn set_loose_types(loose: bool) {
+    pub(crate) fn set_loose_types(loose: bool) {
         unimplemented!()
     }
-    pub fn set_rust_rules(val: bool) {
+    pub(crate) fn set_rust_rules(val: bool) {
         unimplemented!()
     }
 }
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index 39087a4d6f4..9ff04f72903 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -32,10 +32,10 @@ use crate::llvm;
 
 /// In the LLVM-C API, boolean values are passed as `typedef int LLVMBool`,
 /// which has a different ABI from Rust or C++ `bool`.
-pub type Bool = c_int;
+pub(crate) type Bool = c_int;
 
-pub const True: Bool = 1 as Bool;
-pub const False: Bool = 0 as Bool;
+pub(crate) const True: Bool = 1 as Bool;
+pub(crate) const False: Bool = 0 as Bool;
 
 /// Wrapper for a raw enum value returned from LLVM's C APIs.
 ///
@@ -44,7 +44,7 @@ pub const False: Bool = 0 as Bool;
 /// value and returns it. Instead, return this raw wrapper, then convert to the
 /// Rust-side enum explicitly.
 #[repr(transparent)]
-pub struct RawEnum<T> {
+pub(crate) struct RawEnum<T> {
     value: u32,
     /// We don't own or consume a `T`, but we can produce one.
     _rust_side_type: PhantomData<fn() -> T>,
@@ -64,7 +64,7 @@ impl<T: TryFrom<u32>> RawEnum<T> {
 #[derive(Copy, Clone, PartialEq)]
 #[repr(C)]
 #[allow(dead_code)] // Variants constructed by C++.
-pub enum LLVMRustResult {
+pub(crate) enum LLVMRustResult {
     Success,
     Failure,
 }
@@ -83,7 +83,7 @@ pub enum LLVMRustResult {
 /// C++ API.
 #[derive(Copy, Clone, PartialEq)]
 #[repr(C)]
-pub enum ModuleFlagMergeBehavior {
+pub(crate) enum ModuleFlagMergeBehavior {
     Error = 1,
     Warning = 2,
     Require = 3,
@@ -101,7 +101,7 @@ pub enum ModuleFlagMergeBehavior {
 /// See <https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/IR/CallingConv.h>
 #[derive(Copy, Clone, PartialEq, Debug, TryFromU32)]
 #[repr(C)]
-pub enum CallConv {
+pub(crate) enum CallConv {
     CCallConv = 0,
     FastCallConv = 8,
     ColdCallConv = 9,
@@ -126,7 +126,7 @@ pub enum CallConv {
 /// Must match the layout of `LLVMLinkage`.
 #[derive(Copy, Clone, PartialEq, TryFromU32)]
 #[repr(C)]
-pub enum Linkage {
+pub(crate) enum Linkage {
     ExternalLinkage = 0,
     AvailableExternallyLinkage = 1,
     LinkOnceAnyLinkage = 2,
@@ -153,7 +153,7 @@ pub enum Linkage {
 /// Must match the layout of `LLVMVisibility`.
 #[repr(C)]
 #[derive(Copy, Clone, PartialEq, TryFromU32)]
-pub enum Visibility {
+pub(crate) enum Visibility {
     Default = 0,
     Hidden = 1,
     Protected = 2,
@@ -171,8 +171,9 @@ impl Visibility {
 
 /// LLVMUnnamedAddr
 #[repr(C)]
-pub enum UnnamedAddr {
+pub(crate) enum UnnamedAddr {
     No,
+    #[expect(dead_code)]
     Local,
     Global,
 }
@@ -180,7 +181,7 @@ pub enum UnnamedAddr {
 /// LLVMDLLStorageClass
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum DLLStorageClass {
+pub(crate) enum DLLStorageClass {
     #[allow(dead_code)]
     Default = 0,
     DllImport = 1, // Function to be imported from DLL.
@@ -193,7 +194,8 @@ pub enum DLLStorageClass {
 /// though it is not ABI compatible (since it's a C++ enum)
 #[repr(C)]
 #[derive(Copy, Clone, Debug)]
-pub enum AttributeKind {
+#[expect(dead_code, reason = "Some variants are unused, but are kept to match the C++")]
+pub(crate) enum AttributeKind {
     AlwaysInline = 0,
     ByVal = 1,
     Cold = 2,
@@ -241,7 +243,7 @@ pub enum AttributeKind {
 /// LLVMIntPredicate
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum IntPredicate {
+pub(crate) enum IntPredicate {
     IntEQ = 32,
     IntNE = 33,
     IntUGT = 34,
@@ -275,7 +277,7 @@ impl IntPredicate {
 /// LLVMRealPredicate
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum RealPredicate {
+pub(crate) enum RealPredicate {
     RealPredicateFalse = 0,
     RealOEQ = 1,
     RealOGT = 2,
@@ -321,7 +323,8 @@ impl RealPredicate {
 /// LLVMTypeKind
 #[derive(Copy, Clone, PartialEq, Debug)]
 #[repr(C)]
-pub enum TypeKind {
+#[expect(dead_code, reason = "Some variants are unused, but are kept to match LLVM-C")]
+pub(crate) enum TypeKind {
     Void = 0,
     Half = 1,
     Float = 2,
@@ -373,7 +376,7 @@ impl TypeKind {
 /// LLVMAtomicRmwBinOp
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum AtomicRmwBinOp {
+pub(crate) enum AtomicRmwBinOp {
     AtomicXchg = 0,
     AtomicAdd = 1,
     AtomicSub = 2,
@@ -409,7 +412,7 @@ impl AtomicRmwBinOp {
 /// LLVMAtomicOrdering
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum AtomicOrdering {
+pub(crate) enum AtomicOrdering {
     #[allow(dead_code)]
     NotAtomic = 0,
     Unordered = 1,
@@ -438,7 +441,7 @@ impl AtomicOrdering {
 /// LLVMRustFileType
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum FileType {
+pub(crate) enum FileType {
     AssemblyFile,
     ObjectFile,
 }
@@ -446,7 +449,8 @@ pub enum FileType {
 /// LLVMMetadataType
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum MetadataType {
+#[expect(dead_code, reason = "Some variants are unused, but are kept to match LLVM-C")]
+pub(crate) enum MetadataType {
     MD_dbg = 0,
     MD_tbaa = 1,
     MD_prof = 2,
@@ -470,7 +474,7 @@ pub enum MetadataType {
 /// LLVMRustAsmDialect
 #[derive(Copy, Clone, PartialEq)]
 #[repr(C)]
-pub enum AsmDialect {
+pub(crate) enum AsmDialect {
     Att,
     Intel,
 }
@@ -478,7 +482,7 @@ pub enum AsmDialect {
 /// LLVMRustCodeGenOptLevel
 #[derive(Copy, Clone, PartialEq)]
 #[repr(C)]
-pub enum CodeGenOptLevel {
+pub(crate) enum CodeGenOptLevel {
     None,
     Less,
     Default,
@@ -487,7 +491,7 @@ pub enum CodeGenOptLevel {
 
 /// LLVMRustPassBuilderOptLevel
 #[repr(C)]
-pub enum PassBuilderOptLevel {
+pub(crate) enum PassBuilderOptLevel {
     O0,
     O1,
     O2,
@@ -499,7 +503,7 @@ pub enum PassBuilderOptLevel {
 /// LLVMRustOptStage
 #[derive(PartialEq)]
 #[repr(C)]
-pub enum OptStage {
+pub(crate) enum OptStage {
     PreLinkNoLTO,
     PreLinkThinLTO,
     PreLinkFatLTO,
@@ -509,7 +513,7 @@ pub enum OptStage {
 
 /// LLVMRustSanitizerOptions
 #[repr(C)]
-pub struct SanitizerOptions {
+pub(crate) struct SanitizerOptions {
     pub sanitize_address: bool,
     pub sanitize_address_recover: bool,
     pub sanitize_cfi: bool,
@@ -530,7 +534,7 @@ pub struct SanitizerOptions {
 /// LLVMRustRelocModel
 #[derive(Copy, Clone, PartialEq)]
 #[repr(C)]
-pub enum RelocModel {
+pub(crate) enum RelocModel {
     Static,
     PIC,
     DynamicNoPic,
@@ -542,7 +546,7 @@ pub enum RelocModel {
 /// LLVMRustFloatABI
 #[derive(Copy, Clone, PartialEq)]
 #[repr(C)]
-pub enum FloatAbi {
+pub(crate) enum FloatAbi {
     Default,
     Soft,
     Hard,
@@ -551,7 +555,7 @@ pub enum FloatAbi {
 /// LLVMRustCodeModel
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum CodeModel {
+pub(crate) enum CodeModel {
     Tiny,
     Small,
     Kernel,
@@ -564,7 +568,7 @@ pub enum CodeModel {
 #[derive(Copy, Clone)]
 #[repr(C)]
 #[allow(dead_code)] // Variants constructed by C++.
-pub enum DiagnosticKind {
+pub(crate) enum DiagnosticKind {
     Other,
     InlineAsm,
     StackSize,
@@ -587,7 +591,7 @@ pub enum DiagnosticKind {
 #[derive(Copy, Clone)]
 #[repr(C)]
 #[allow(dead_code)] // Variants constructed by C++.
-pub enum DiagnosticLevel {
+pub(crate) enum DiagnosticLevel {
     Error,
     Warning,
     Note,
@@ -597,7 +601,7 @@ pub enum DiagnosticLevel {
 /// LLVMRustArchiveKind
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum ArchiveKind {
+pub(crate) enum ArchiveKind {
     K_GNU,
     K_BSD,
     K_DARWIN,
@@ -607,15 +611,15 @@ pub enum ArchiveKind {
 
 unsafe extern "C" {
     // LLVMRustThinLTOData
-    pub type ThinLTOData;
+    pub(crate) type ThinLTOData;
 
     // LLVMRustThinLTOBuffer
-    pub type ThinLTOBuffer;
+    pub(crate) type ThinLTOBuffer;
 }
 
 /// LLVMRustThinLTOModule
 #[repr(C)]
-pub struct ThinLTOModule {
+pub(crate) struct ThinLTOModule {
     pub identifier: *const c_char,
     pub data: *const u8,
     pub len: usize,
@@ -624,7 +628,8 @@ pub struct ThinLTOModule {
 /// LLVMThreadLocalMode
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum ThreadLocalMode {
+pub(crate) enum ThreadLocalMode {
+    #[expect(dead_code)]
     NotThreadLocal,
     GeneralDynamic,
     LocalDynamic,
@@ -635,7 +640,7 @@ pub enum ThreadLocalMode {
 /// LLVMRustChecksumKind
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum ChecksumKind {
+pub(crate) enum ChecksumKind {
     None,
     MD5,
     SHA1,
@@ -645,7 +650,7 @@ pub enum ChecksumKind {
 /// LLVMRustMemoryEffects
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum MemoryEffects {
+pub(crate) enum MemoryEffects {
     None,
     ReadOnly,
     InaccessibleMemOnly,
@@ -654,7 +659,8 @@ pub enum MemoryEffects {
 /// LLVMOpcode
 #[derive(Copy, Clone, PartialEq, Eq)]
 #[repr(C)]
-pub enum Opcode {
+#[expect(dead_code, reason = "Some variants are unused, but are kept to match LLVM-C")]
+pub(crate) enum Opcode {
     Ret = 1,
     Br = 2,
     Switch = 3,
@@ -735,48 +741,48 @@ struct InvariantOpaque<'a> {
 
 // Opaque pointer types
 unsafe extern "C" {
-    pub type Module;
-    pub type Context;
-    pub type Type;
-    pub type Value;
-    pub type ConstantInt;
-    pub type Attribute;
-    pub type Metadata;
-    pub type BasicBlock;
-    pub type Comdat;
+    pub(crate) type Module;
+    pub(crate) type Context;
+    pub(crate) type Type;
+    pub(crate) type Value;
+    pub(crate) type ConstantInt;
+    pub(crate) type Attribute;
+    pub(crate) type Metadata;
+    pub(crate) type BasicBlock;
+    pub(crate) type Comdat;
 }
 #[repr(C)]
-pub struct Builder<'a>(InvariantOpaque<'a>);
+pub(crate) struct Builder<'a>(InvariantOpaque<'a>);
 #[repr(C)]
-pub struct PassManager<'a>(InvariantOpaque<'a>);
+pub(crate) struct PassManager<'a>(InvariantOpaque<'a>);
 unsafe extern "C" {
     pub type TargetMachine;
-    pub type Archive;
+    pub(crate) type Archive;
 }
 #[repr(C)]
-pub struct ArchiveIterator<'a>(InvariantOpaque<'a>);
+pub(crate) struct ArchiveIterator<'a>(InvariantOpaque<'a>);
 #[repr(C)]
-pub struct ArchiveChild<'a>(InvariantOpaque<'a>);
+pub(crate) struct ArchiveChild<'a>(InvariantOpaque<'a>);
 unsafe extern "C" {
-    pub type Twine;
-    pub type DiagnosticInfo;
-    pub type SMDiagnostic;
+    pub(crate) type Twine;
+    pub(crate) type DiagnosticInfo;
+    pub(crate) type SMDiagnostic;
 }
 #[repr(C)]
-pub struct RustArchiveMember<'a>(InvariantOpaque<'a>);
+pub(crate) struct RustArchiveMember<'a>(InvariantOpaque<'a>);
 /// Opaque pointee of `LLVMOperandBundleRef`.
 #[repr(C)]
 pub(crate) struct OperandBundle<'a>(InvariantOpaque<'a>);
 #[repr(C)]
-pub struct Linker<'a>(InvariantOpaque<'a>);
+pub(crate) struct Linker<'a>(InvariantOpaque<'a>);
 
 unsafe extern "C" {
-    pub type DiagnosticHandler;
+    pub(crate) type DiagnosticHandler;
 }
 
-pub type DiagnosticHandlerTy = unsafe extern "C" fn(&DiagnosticInfo, *mut c_void);
+pub(crate) type DiagnosticHandlerTy = unsafe extern "C" fn(&DiagnosticInfo, *mut c_void);
 
-pub mod debuginfo {
+pub(crate) mod debuginfo {
     use std::ptr;
 
     use bitflags::bitflags;
@@ -793,7 +799,7 @@ pub mod debuginfo {
     /// builder reference typically has a shorter lifetime than the LLVM
     /// session (`'ll`) that it participates in.
     #[repr(C)]
-    pub struct DIBuilder<'ll>(InvariantOpaque<'ll>);
+    pub(crate) struct DIBuilder<'ll>(InvariantOpaque<'ll>);
 
     /// Owning pointer to a `DIBuilder<'ll>` that will dispose of the builder
     /// when dropped. Use `.as_ref()` to get the underlying `&DIBuilder`
@@ -822,22 +828,22 @@ pub mod debuginfo {
         }
     }
 
-    pub type DIDescriptor = Metadata;
-    pub type DILocation = Metadata;
-    pub type DIScope = DIDescriptor;
-    pub type DIFile = DIScope;
-    pub type DILexicalBlock = DIScope;
-    pub type DISubprogram = DIScope;
-    pub type DIType = DIDescriptor;
-    pub type DIBasicType = DIType;
-    pub type DIDerivedType = DIType;
-    pub type DICompositeType = DIDerivedType;
-    pub type DIVariable = DIDescriptor;
-    pub type DIGlobalVariableExpression = DIDescriptor;
-    pub type DIArray = DIDescriptor;
-    pub type DISubrange = DIDescriptor;
-    pub type DIEnumerator = DIDescriptor;
-    pub type DITemplateTypeParameter = DIDescriptor;
+    pub(crate) type DIDescriptor = Metadata;
+    pub(crate) type DILocation = Metadata;
+    pub(crate) type DIScope = DIDescriptor;
+    pub(crate) type DIFile = DIScope;
+    pub(crate) type DILexicalBlock = DIScope;
+    pub(crate) type DISubprogram = DIScope;
+    pub(crate) type DIType = DIDescriptor;
+    pub(crate) type DIBasicType = DIType;
+    pub(crate) type DIDerivedType = DIType;
+    pub(crate) type DICompositeType = DIDerivedType;
+    pub(crate) type DIVariable = DIDescriptor;
+    pub(crate) type DIGlobalVariableExpression = DIDescriptor;
+    pub(crate) type DIArray = DIDescriptor;
+    pub(crate) type DISubrange = DIDescriptor;
+    pub(crate) type DIEnumerator = DIDescriptor;
+    pub(crate) type DITemplateTypeParameter = DIDescriptor;
 
     bitflags! {
         /// Must match the layout of `LLVMDIFlags` in the LLVM-C API.
@@ -846,7 +852,7 @@ pub mod debuginfo {
         /// assertions in `RustWrapper.cpp` used by `fromRust(LLVMDIFlags)`.
         #[repr(transparent)]
         #[derive(Clone, Copy, Default)]
-        pub struct DIFlags: u32 {
+        pub(crate) struct DIFlags: u32 {
             const FlagZero                = 0;
             const FlagPrivate             = 1;
             const FlagProtected           = 2;
@@ -886,7 +892,7 @@ pub mod debuginfo {
     bitflags! {
         #[repr(transparent)]
         #[derive(Clone, Copy, Default)]
-        pub struct DISPFlags: u32 {
+        pub(crate) struct DISPFlags: u32 {
             const SPFlagZero              = 0;
             const SPFlagVirtual           = 1;
             const SPFlagPureVirtual       = 2;
@@ -900,7 +906,7 @@ pub mod debuginfo {
     /// LLVMRustDebugEmissionKind
     #[derive(Copy, Clone)]
     #[repr(C)]
-    pub enum DebugEmissionKind {
+    pub(crate) enum DebugEmissionKind {
         NoDebug,
         FullDebug,
         LineTablesOnly,
@@ -932,8 +938,9 @@ pub mod debuginfo {
     /// LLVMRustDebugNameTableKind
     #[derive(Clone, Copy)]
     #[repr(C)]
-    pub enum DebugNameTableKind {
+    pub(crate) enum DebugNameTableKind {
         Default,
+        #[expect(dead_code)]
         Gnu,
         None,
     }
@@ -943,7 +950,7 @@ pub mod debuginfo {
 bitflags! {
     #[repr(transparent)]
     #[derive(Default)]
-    pub struct AllocKindFlags : u64 {
+    pub(crate) struct AllocKindFlags : u64 {
         const Unknown = 0;
         const Alloc = 1;
         const Realloc = 1 << 1;
@@ -966,19 +973,20 @@ bitflags! {
 }
 
 unsafe extern "C" {
-    pub type ModuleBuffer;
+    pub(crate) type ModuleBuffer;
 }
 
-pub type SelfProfileBeforePassCallback =
+pub(crate) type SelfProfileBeforePassCallback =
     unsafe extern "C" fn(*mut c_void, *const c_char, *const c_char);
-pub type SelfProfileAfterPassCallback = unsafe extern "C" fn(*mut c_void);
+pub(crate) type SelfProfileAfterPassCallback = unsafe extern "C" fn(*mut c_void);
 
-pub type GetSymbolsCallback = unsafe extern "C" fn(*mut c_void, *const c_char) -> *mut c_void;
-pub type GetSymbolsErrorCallback = unsafe extern "C" fn(*const c_char) -> *mut c_void;
+pub(crate) type GetSymbolsCallback =
+    unsafe extern "C" fn(*mut c_void, *const c_char) -> *mut c_void;
+pub(crate) type GetSymbolsErrorCallback = unsafe extern "C" fn(*const c_char) -> *mut c_void;
 
 #[derive(Copy, Clone)]
 #[repr(transparent)]
-pub struct MetadataKindId(c_uint);
+pub(crate) struct MetadataKindId(c_uint);
 
 impl From<MetadataType> for MetadataKindId {
     fn from(value: MetadataType) -> Self {
@@ -1172,7 +1180,7 @@ unsafe extern "C" {
 
     // Operations on parameters
     pub(crate) fn LLVMIsAArgument(Val: &Value) -> Option<&Value>;
-    pub(crate) fn LLVMCountParams(Fn: &Value) -> c_uint;
+    pub(crate) safe fn LLVMCountParams(Fn: &Value) -> c_uint;
     pub(crate) fn LLVMGetParam(Fn: &Value, Index: c_uint) -> &Value;
 
     // Operations on basic blocks
@@ -2019,6 +2027,8 @@ unsafe extern "C" {
         NumExpressions: size_t,
         CodeRegions: *const crate::coverageinfo::ffi::CodeRegion,
         NumCodeRegions: size_t,
+        ExpansionRegions: *const crate::coverageinfo::ffi::ExpansionRegion,
+        NumExpansionRegions: size_t,
         BranchRegions: *const crate::coverageinfo::ffi::BranchRegion,
         NumBranchRegions: size_t,
         MCDCBranchRegions: *const crate::coverageinfo::ffi::MCDCBranchRegion,
diff --git a/compiler/rustc_codegen_llvm/src/llvm/mod.rs b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
index a36226b25a2..6ca81c651ed 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
@@ -9,18 +9,18 @@ use libc::c_uint;
 use rustc_abi::{Align, Size, WrappingRange};
 use rustc_llvm::RustString;
 
-pub use self::CallConv::*;
-pub use self::CodeGenOptSize::*;
-pub use self::MetadataType::*;
-pub use self::ffi::*;
+pub(crate) use self::CallConv::*;
+pub(crate) use self::CodeGenOptSize::*;
+pub(crate) use self::MetadataType::*;
+pub(crate) use self::ffi::*;
 use crate::common::AsCCharPtr;
 
-pub mod archive_ro;
-pub mod diagnostic;
-pub mod enzyme_ffi;
+pub(crate) mod archive_ro;
+pub(crate) mod diagnostic;
+pub(crate) mod enzyme_ffi;
 mod ffi;
 
-pub use self::enzyme_ffi::*;
+pub(crate) use self::enzyme_ffi::*;
 
 impl LLVMRustResult {
     pub(crate) fn into_result(self) -> Result<(), ()> {
@@ -127,7 +127,7 @@ pub(crate) fn CreateRangeAttr(llcx: &Context, size: Size, range: WrappingRange)
 }
 
 #[derive(Copy, Clone)]
-pub enum AttributePlace {
+pub(crate) enum AttributePlace {
     ReturnValue,
     Argument(u32),
     Function,
@@ -145,7 +145,7 @@ impl AttributePlace {
 
 #[derive(Copy, Clone, PartialEq)]
 #[repr(C)]
-pub enum CodeGenOptSize {
+pub(crate) enum CodeGenOptSize {
     CodeGenOptSizeNone = 0,
     CodeGenOptSizeDefault = 1,
     CodeGenOptSizeAggressive = 2,
diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs
index 4a166b0872d..36e35f81392 100644
--- a/compiler/rustc_codegen_llvm/src/llvm_util.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs
@@ -256,7 +256,6 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
         ("aarch64", "pmuv3") => Some(LLVMFeature::new("perfmon")),
         ("aarch64", "paca") => Some(LLVMFeature::new("pauth")),
         ("aarch64", "pacg") => Some(LLVMFeature::new("pauth")),
-        ("aarch64", "pauth-lr") if get_version().0 < 19 => None,
         // Before LLVM 20 those two features were packaged together as b16b16
         ("aarch64", "sve-b16b16") if get_version().0 < 20 => Some(LLVMFeature::new("b16b16")),
         ("aarch64", "sme-b16b16") if get_version().0 < 20 => Some(LLVMFeature::new("b16b16")),
@@ -270,18 +269,9 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
         ("aarch64", "fhm") => Some(LLVMFeature::new("fp16fml")),
         ("aarch64", "fp16") => Some(LLVMFeature::new("fullfp16")),
         // Filter out features that are not supported by the current LLVM version
-        ("aarch64", "fpmr") if get_version().0 != 18 => None,
+        ("aarch64", "fpmr") => None, // only existed in 18
         ("arm", "fp16") => Some(LLVMFeature::new("fullfp16")),
-        // In LLVM 18, `unaligned-scalar-mem` was merged with `unaligned-vector-mem` into a single
-        // feature called `fast-unaligned-access`. In LLVM 19, it was split back out.
-        ("riscv32" | "riscv64", "unaligned-scalar-mem") if get_version().0 == 18 => {
-            Some(LLVMFeature::new("fast-unaligned-access"))
-        }
         // Filter out features that are not supported by the current LLVM version
-        ("riscv32" | "riscv64", "zaamo") if get_version().0 < 19 => None,
-        ("riscv32" | "riscv64", "zabha") if get_version().0 < 19 => None,
-        ("riscv32" | "riscv64", "zalrsc") if get_version().0 < 19 => None,
-        ("riscv32" | "riscv64", "zama16b") if get_version().0 < 19 => None,
         ("riscv32" | "riscv64", "zacas") if get_version().0 < 20 => None,
         // Enable the evex512 target feature if an avx512 target feature is enabled.
         ("x86", s) if s.starts_with("avx512") => {
@@ -293,11 +283,17 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
         ("sparc", "leoncasa") => Some(LLVMFeature::new("hasleoncasa")),
         // In LLVM 19, there is no `v8plus` feature and `v9` means "SPARC-V9 instruction available and SPARC-V8+ ABI used".
         // https://github.com/llvm/llvm-project/blob/llvmorg-19.1.0/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp#L27-L28
-        // Before LLVM 19, there is no `v8plus` feature and `v9` means "SPARC-V9 instruction available".
+        // Before LLVM 19, there was no `v8plus` feature and `v9` means "SPARC-V9 instruction available".
         // https://github.com/llvm/llvm-project/blob/llvmorg-18.1.0/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp#L26
         ("sparc", "v8plus") if get_version().0 == 19 => Some(LLVMFeature::new("v9")),
-        ("sparc", "v8plus") if get_version().0 < 19 => None,
         ("powerpc", "power8-crypto") => Some(LLVMFeature::new("crypto")),
+        // These new `amx` variants and `movrs` were introduced in LLVM20
+        ("x86", "amx-avx512" | "amx-fp8" | "amx-movrs" | "amx-tf32" | "amx-transpose")
+            if get_version().0 < 20 =>
+        {
+            None
+        }
+        ("x86", "movrs") if get_version().0 < 20 => None,
         (_, s) => Some(LLVMFeature::new(s)),
     }
 }
diff --git a/compiler/rustc_codegen_llvm/src/mono_item.rs b/compiler/rustc_codegen_llvm/src/mono_item.rs
index a64627eaf59..fdf62a08065 100644
--- a/compiler/rustc_codegen_llvm/src/mono_item.rs
+++ b/compiler/rustc_codegen_llvm/src/mono_item.rs
@@ -120,7 +120,7 @@ impl CodegenCx<'_, '_> {
         }
 
         // Match clang by only supporting COFF and ELF for now.
-        if self.tcx.sess.target.is_like_osx {
+        if self.tcx.sess.target.is_like_darwin {
             return false;
         }
 
diff --git a/compiler/rustc_codegen_llvm/src/va_arg.rs b/compiler/rustc_codegen_llvm/src/va_arg.rs
index 8baa69cefe1..c216f0f4a09 100644
--- a/compiler/rustc_codegen_llvm/src/va_arg.rs
+++ b/compiler/rustc_codegen_llvm/src/va_arg.rs
@@ -399,7 +399,7 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
             emit_ptr_va_arg(bx, addr, target_ty, false, Align::from_bytes(8).unwrap(), false)
         }
         // macOS / iOS AArch64
-        "aarch64" if target.is_like_osx => {
+        "aarch64" if target.is_like_darwin => {
             emit_ptr_va_arg(bx, addr, target_ty, false, Align::from_bytes(8).unwrap(), true)
         }
         "aarch64" => emit_aapcs_va_arg(bx, addr, target_ty),