38 files changed, 1800 insertions, 1071 deletions
diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs
index 9aa01bd1b95..3722d4350a2 100644
--- a/compiler/rustc_codegen_llvm/src/asm.rs
+++ b/compiler/rustc_codegen_llvm/src/asm.rs
@@ -442,6 +442,14 @@ impl<'tcx> AsmCodegenMethods<'tcx> for CodegenCx<'_, 'tcx> {
             );
         }
     }
+
+    fn mangled_name(&self, instance: Instance<'tcx>) -> String {
+        let llval = self.get_fn(instance);
+        llvm::build_string(|s| unsafe {
+            llvm::LLVMRustGetMangledName(llval, s);
+        })
+        .expect("symbol is not valid UTF-8")
+    }
 }
 
 pub(crate) fn inline_asm_call<'ll>(
@@ -504,14 +512,13 @@ pub(crate) fn inline_asm_call<'ll>(
             let key = "srcloc";
             let kind = llvm::LLVMGetMDKindIDInContext(
                 bx.llcx,
-                key.as_ptr() as *const c_char,
+                key.as_ptr().cast::<c_char>(),
                 key.len() as c_uint,
             );
 
-            // srcloc contains one integer for each line of assembly code.
-            // Unfortunately this isn't enough to encode a full span so instead
-            // we just encode the start position of each line.
-            // FIXME: Figure out a way to pass the entire line spans.
+            // `srcloc` contains one 64-bit integer for each line of assembly code,
+            // where the lower 32 bits hold the lo byte position and the upper 32 bits
+            // hold the hi byte position.
             let mut srcloc = vec![];
             if dia == llvm::AsmDialect::Intel && line_spans.len() > 1 {
                 // LLVM inserts an extra line to add the ".intel_syntax", so add
@@ -521,13 +528,13 @@ pub(crate) fn inline_asm_call<'ll>(
                 // due to the asm template string coming from a macro. LLVM will
                 // default to the first srcloc for lines that don't have an
                 // associated srcloc.
-                srcloc.push(llvm::LLVMValueAsMetadata(bx.const_i32(0)));
+                srcloc.push(llvm::LLVMValueAsMetadata(bx.const_u64(0)));
             }
-            srcloc.extend(
-                line_spans
-                    .iter()
-                    .map(|span| llvm::LLVMValueAsMetadata(bx.const_i32(span.lo().to_u32() as i32))),
-            );
+            srcloc.extend(line_spans.iter().map(|span| {
+                llvm::LLVMValueAsMetadata(bx.const_u64(
+                    u64::from(span.lo().to_u32()) | (u64::from(span.hi().to_u32()) << 32),
+                ))
+            }));
             let md = llvm::LLVMMDNodeInContext2(bx.llcx, srcloc.as_ptr(), srcloc.len());
             let md = llvm::LLVMMetadataAsValue(&bx.llcx, md);
             llvm::LLVMSetMetadata(call, kind, md);
@@ -656,9 +663,8 @@ fn reg_to_llvm(reg: InlineAsmRegOrRegClass, layout: Option<&TyAndLayout<'_>>) ->
             PowerPC(PowerPCInlineAsmRegClass::reg) => "r",
             PowerPC(PowerPCInlineAsmRegClass::reg_nonzero) => "b",
             PowerPC(PowerPCInlineAsmRegClass::freg) => "f",
-            PowerPC(PowerPCInlineAsmRegClass::cr)
-            | PowerPC(PowerPCInlineAsmRegClass::xer)
-            | PowerPC(PowerPCInlineAsmRegClass::vreg) => {
+            PowerPC(PowerPCInlineAsmRegClass::vreg) => "v",
+            PowerPC(PowerPCInlineAsmRegClass::cr) | PowerPC(PowerPCInlineAsmRegClass::xer) => {
                 unreachable!("clobber-only")
             }
             RiscV(RiscVInlineAsmRegClass::reg) => "r",
@@ -825,9 +831,8 @@ fn dummy_output_type<'ll>(cx: &CodegenCx<'ll, '_>, reg: InlineAsmRegClass) -> &'
         PowerPC(PowerPCInlineAsmRegClass::reg) => cx.type_i32(),
         PowerPC(PowerPCInlineAsmRegClass::reg_nonzero) => cx.type_i32(),
         PowerPC(PowerPCInlineAsmRegClass::freg) => cx.type_f64(),
-        PowerPC(PowerPCInlineAsmRegClass::cr)
-        | PowerPC(PowerPCInlineAsmRegClass::xer)
-        | PowerPC(PowerPCInlineAsmRegClass::vreg) => {
+        PowerPC(PowerPCInlineAsmRegClass::vreg) => cx.type_vector(cx.type_i32(), 4),
+        PowerPC(PowerPCInlineAsmRegClass::cr) | PowerPC(PowerPCInlineAsmRegClass::xer) => {
             unreachable!("clobber-only")
         }
         RiscV(RiscVInlineAsmRegClass::reg) => cx.type_i32(),
@@ -1042,6 +1047,26 @@ fn llvm_fixup_input<'ll, 'tcx>(
             let value = bx.or(value, bx.const_u32(0xFFFF_0000));
             bx.bitcast(value, bx.type_f32())
         }
+        (PowerPC(PowerPCInlineAsmRegClass::vreg), BackendRepr::Scalar(s))
+            if s.primitive() == Primitive::Float(Float::F32) =>
+        {
+            let value = bx.insert_element(
+                bx.const_undef(bx.type_vector(bx.type_f32(), 4)),
+                value,
+                bx.const_usize(0),
+            );
+            bx.bitcast(value, bx.type_vector(bx.type_f32(), 4))
+        }
+        (PowerPC(PowerPCInlineAsmRegClass::vreg), BackendRepr::Scalar(s))
+            if s.primitive() == Primitive::Float(Float::F64) =>
+        {
+            let value = bx.insert_element(
+                bx.const_undef(bx.type_vector(bx.type_f64(), 2)),
+                value,
+                bx.const_usize(0),
+            );
+            bx.bitcast(value, bx.type_vector(bx.type_f64(), 2))
+        }
         _ => value,
     }
 }
@@ -1177,6 +1202,18 @@ fn llvm_fixup_output<'ll, 'tcx>(
             let value = bx.trunc(value, bx.type_i16());
             bx.bitcast(value, bx.type_f16())
         }
+        (PowerPC(PowerPCInlineAsmRegClass::vreg), BackendRepr::Scalar(s))
+            if s.primitive() == Primitive::Float(Float::F32) =>
+        {
+            let value = bx.bitcast(value, bx.type_vector(bx.type_f32(), 4));
+            bx.extract_element(value, bx.const_usize(0))
+        }
+        (PowerPC(PowerPCInlineAsmRegClass::vreg), BackendRepr::Scalar(s))
+            if s.primitive() == Primitive::Float(Float::F64) =>
+        {
+            let value = bx.bitcast(value, bx.type_vector(bx.type_f64(), 2));
+            bx.extract_element(value, bx.const_usize(0))
+        }
         _ => value,
     }
 }
@@ -1301,6 +1338,16 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
         {
             cx.type_f32()
         }
+        (PowerPC(PowerPCInlineAsmRegClass::vreg), BackendRepr::Scalar(s))
+            if s.primitive() == Primitive::Float(Float::F32) =>
+        {
+            cx.type_vector(cx.type_f32(), 4)
+        }
+        (PowerPC(PowerPCInlineAsmRegClass::vreg), BackendRepr::Scalar(s))
+            if s.primitive() == Primitive::Float(Float::F64) =>
+        {
+            cx.type_vector(cx.type_f64(), 2)
+        }
         _ => layout.llvm_type(cx),
     }
 }
diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs
index cb958c1d4d7..95e0481b035 100644
--- a/compiler/rustc_codegen_llvm/src/attributes.rs
+++ b/compiler/rustc_codegen_llvm/src/attributes.rs
@@ -1,6 +1,6 @@
 //! Set and unset common attributes on LLVM values.
 
-use rustc_attr::{InlineAttr, InstructionSetAttr, OptimizeAttr};
+use rustc_attr_parsing::{InlineAttr, InstructionSetAttr, OptimizeAttr};
 use rustc_codegen_ssa::traits::*;
 use rustc_hir::def_id::DefId;
 use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, PatchableFunctionEntry};
@@ -37,7 +37,9 @@ fn inline_attr<'ll>(cx: &CodegenCx<'ll, '_>, inline: InlineAttr) -> Option<&'ll
     }
     match inline {
         InlineAttr::Hint => Some(AttributeKind::InlineHint.create_attr(cx.llcx)),
-        InlineAttr::Always => Some(AttributeKind::AlwaysInline.create_attr(cx.llcx)),
+        InlineAttr::Always | InlineAttr::Force { .. } => {
+            Some(AttributeKind::AlwaysInline.create_attr(cx.llcx))
+        }
         InlineAttr::Never => {
             if cx.sess().target.arch != "amdgpu" {
                 Some(AttributeKind::NoInline.create_attr(cx.llcx))
@@ -395,17 +397,9 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>(
         to_add.push(MemoryEffects::None.create_attr(cx.llcx));
     }
     if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED) {
-        to_add.push(AttributeKind::Naked.create_attr(cx.llcx));
-        // HACK(jubilee): "indirect branch tracking" works by attaching prologues to functions.
-        // And it is a module-level attribute, so the alternative is pulling naked functions into
-        // new LLVM modules. Otherwise LLVM's "naked" functions come with endbr prefixes per
-        // https://github.com/rust-lang/rust/issues/98768
-        to_add.push(AttributeKind::NoCfCheck.create_attr(cx.llcx));
-        if llvm_util::get_version() < (19, 0, 0) {
-            // Prior to LLVM 19, branch-target-enforcement was disabled by setting the attribute to
-            // the string "false". Now it is disabled by absence of the attribute.
-            to_add.push(llvm::CreateAttrStringValue(cx.llcx, "branch-target-enforcement", "false"));
-        }
+        // do nothing; a naked function is converted into an extern function
+        // and a global assembly block. LLVM's support for naked functions is
+        // not used.
     } else {
         // Do not set sanitizer attributes for naked functions.
         to_add.extend(sanitize_attrs(cx, codegen_fn_attrs.no_sanitize));
@@ -480,7 +474,11 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>(
         let allocated_pointer = AttributeKind::AllocatedPointer.create_attr(cx.llcx);
         attributes::apply_to_llfn(llfn, AttributePlace::Argument(0), &[allocated_pointer]);
     }
-    if let Some(align) = codegen_fn_attrs.alignment {
+    // function alignment can be set globally with the `-Zmin-function-alignment=<n>` flag;
+    // the alignment from a `#[repr(align(<n>))]` is used if it specifies a higher alignment.
+    if let Some(align) =
+        Ord::max(cx.tcx.sess.opts.unstable_opts.min_function_alignment, codegen_fn_attrs.alignment)
+    {
         llvm::set_alignment(llfn, align);
     }
     if let Some(backchain) = backchain_attr(cx) {
diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs
index 48beb9be2b2..78c759bbe8c 100644
--- a/compiler/rustc_codegen_llvm/src/back/lto.rs
+++ b/compiler/rustc_codegen_llvm/src/back/lto.rs
@@ -1,7 +1,6 @@
 use std::collections::BTreeMap;
 use std::ffi::{CStr, CString};
 use std::fs::File;
-use std::mem::ManuallyDrop;
 use std::path::Path;
 use std::sync::Arc;
 use std::{io, iter, slice};
@@ -9,7 +8,7 @@ use std::{io, iter, slice};
 use object::read::archive::ArchiveFile;
 use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule, ThinShared};
 use rustc_codegen_ssa::back::symbol_export;
-use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput, TargetMachineFactoryConfig};
+use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput};
 use rustc_codegen_ssa::traits::*;
 use rustc_codegen_ssa::{ModuleCodegen, ModuleKind, looks_like_rust_object_file};
 use rustc_data_structures::fx::FxHashMap;
@@ -148,7 +147,7 @@ fn prepare_lto(
     // __llvm_profile_counter_bias is pulled in at link time by an undefined reference to
     // __llvm_profile_runtime, therefore we won't know until link time if this symbol
     // should have default visibility.
-    symbols_below_threshold.push(CString::new("__llvm_profile_counter_bias").unwrap());
+    symbols_below_threshold.push(c"__llvm_profile_counter_bias".to_owned());
     Ok((symbols_below_threshold, upstream_modules))
 }
 
@@ -604,7 +603,14 @@ pub(crate) fn run_pass_manager(
     debug!("running the pass manager");
     let opt_stage = if thin { llvm::OptStage::ThinLTO } else { llvm::OptStage::FatLTO };
     let opt_level = config.opt_level.unwrap_or(config::OptLevel::No);
-    unsafe { write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage) }?;
+
+    // If this rustc version was build with enzyme/autodiff enabled, and if users applied the
+    // `#[autodiff]` macro at least once, then we will later call llvm_optimize a second time.
+    let first_run = true;
+    debug!("running llvm pm opt pipeline");
+    unsafe {
+        write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, first_run)?;
+    }
     debug!("lto done");
     Ok(())
 }
@@ -699,18 +705,15 @@ pub(crate) unsafe fn optimize_thin_module(
     let dcx = dcx.handle();
 
     let module_name = &thin_module.shared.module_names[thin_module.idx];
-    let tm_factory_config = TargetMachineFactoryConfig::new(cgcx, module_name.to_str().unwrap());
-    let tm = (cgcx.tm_factory)(tm_factory_config).map_err(|e| write::llvm_err(dcx, e))?;
 
     // Right now the implementation we've got only works over serialized
     // modules, so we create a fresh new LLVM context and parse the module
     // into that context. One day, however, we may do this for upstream
     // crates but for locally codegened modules we may be able to reuse
     // that LLVM Context and Module.
-    let llcx = unsafe { llvm::LLVMRustContextCreate(cgcx.fewer_names) };
-    let llmod_raw = parse_module(llcx, module_name, thin_module.data(), dcx)? as *const _;
+    let module_llvm = ModuleLlvm::parse(cgcx, module_name, thin_module.data(), dcx)?;
     let mut module = ModuleCodegen {
-        module_llvm: ModuleLlvm { llmod_raw, llcx, tm: ManuallyDrop::new(tm) },
+        module_llvm,
         name: thin_module.name().to_string(),
         kind: ModuleKind::Regular,
     };
@@ -730,11 +733,7 @@ pub(crate) unsafe fn optimize_thin_module(
         {
             let _timer =
                 cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_rename", thin_module.name());
-            if unsafe {
-                !llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target)
-            } {
-                return Err(write::llvm_err(dcx, LlvmError::PrepareThinLtoModule));
-            }
+            unsafe { llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target) };
             save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
         }
 
diff --git a/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs b/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
index 44c30d22a9e..4cbd49aa44d 100644
--- a/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
+++ b/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
@@ -25,7 +25,7 @@ impl OwnedTargetMachine {
         model: llvm::CodeModel,
         reloc: llvm::RelocModel,
         level: llvm::CodeGenOptLevel,
-        use_soft_fp: bool,
+        float_abi: llvm::FloatAbi,
         function_sections: bool,
         data_sections: bool,
         unique_section_names: bool,
@@ -57,7 +57,7 @@ impl OwnedTargetMachine {
                 model,
                 reloc,
                 level,
-                use_soft_fp,
+                float_abi,
                 function_sections,
                 data_sections,
                 unique_section_names,
diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs
index 00f7b479fa7..509b24dd703 100644
--- a/compiler/rustc_codegen_llvm/src/back/write.rs
+++ b/compiler/rustc_codegen_llvm/src/back/write.rs
@@ -25,10 +25,9 @@ use rustc_session::Session;
 use rustc_session::config::{
     self, Lto, OutputType, Passes, RemapPathScopeComponents, SplitDwarfKind, SwitchWithOptPath,
 };
-use rustc_span::InnerSpan;
-use rustc_span::symbol::sym;
-use rustc_target::spec::{CodeModel, RelocModel, SanitizerSet, SplitDebuginfo, TlsModel};
-use tracing::debug;
+use rustc_span::{BytePos, InnerSpan, Pos, SpanData, SyntaxContext, sym};
+use rustc_target::spec::{CodeModel, FloatAbi, RelocModel, SanitizerSet, SplitDebuginfo, TlsModel};
+use tracing::{debug, trace};
 
 use crate::back::lto::ThinBuffer;
 use crate::back::owned_target_machine::OwnedTargetMachine;
@@ -61,6 +60,7 @@ fn write_output_file<'ll>(
     dwo_output: Option<&Path>,
     file_type: llvm::FileType,
     self_profiler_ref: &SelfProfilerRef,
+    verify_llvm_ir: bool,
 ) -> Result<(), FatalError> {
     debug!("write_output_file output={:?} dwo_output={:?}", output, dwo_output);
     unsafe {
@@ -79,6 +79,7 @@ fn write_output_file<'ll>(
             output_c.as_ptr(),
             dwo_output_ptr,
             file_type,
+            verify_llvm_ir,
         );
 
         // Record artifact sizes for self-profiling
@@ -180,6 +181,14 @@ pub(crate) fn to_llvm_code_model(code_model: Option<CodeModel>) -> llvm::CodeMod
     }
 }
 
+fn to_llvm_float_abi(float_abi: Option<FloatAbi>) -> llvm::FloatAbi {
+    match float_abi {
+        None => llvm::FloatAbi::Default,
+        Some(FloatAbi::Soft) => llvm::FloatAbi::Soft,
+        Some(FloatAbi::Hard) => llvm::FloatAbi::Hard,
+    }
+}
+
 pub(crate) fn target_machine_factory(
     sess: &Session,
     optlvl: config::OptLevel,
@@ -188,12 +197,12 @@ pub(crate) fn target_machine_factory(
     let reloc_model = to_llvm_relocation_model(sess.relocation_model());
 
     let (opt_level, _) = to_llvm_opt_settings(optlvl);
-    let use_softfp = if sess.target.arch == "arm" && sess.target.abi == "eabihf" {
-        sess.opts.cg.soft_float
+    let float_abi = if sess.target.arch == "arm" && sess.opts.cg.soft_float {
+        llvm::FloatAbi::Soft
     } else {
         // `validate_commandline_args_with_session_available` has already warned about this being
         // ignored. Let's make sure LLVM doesn't suddenly start using this flag on more targets.
-        false
+        to_llvm_float_abi(sess.target.llvm_floatabi)
     };
 
     let ffunction_sections =
@@ -289,7 +298,7 @@ pub(crate) fn target_machine_factory(
             code_model,
             reloc_model,
             opt_level,
-            use_softfp,
+            float_abi,
             ffunction_sections,
             fdata_sections,
             funique_section_names,
@@ -413,21 +422,32 @@ fn report_inline_asm(
     cgcx: &CodegenContext<LlvmCodegenBackend>,
     msg: String,
     level: llvm::DiagnosticLevel,
-    mut cookie: u64,
+    cookie: u64,
     source: Option<(String, Vec<InnerSpan>)>,
 ) {
     // In LTO build we may get srcloc values from other crates which are invalid
     // since they use a different source map. To be safe we just suppress these
     // in LTO builds.
-    if matches!(cgcx.lto, Lto::Fat | Lto::Thin) {
-        cookie = 0;
-    }
+    let span = if cookie == 0 || matches!(cgcx.lto, Lto::Fat | Lto::Thin) {
+        SpanData::default()
+    } else {
+        let lo = BytePos::from_u32(cookie as u32);
+        let hi = BytePos::from_u32((cookie >> 32) as u32);
+        SpanData {
+            lo,
+            // LLVM version < 19 silently truncates the cookie to 32 bits in some situations.
+            hi: if hi.to_u32() != 0 { hi } else { lo },
+            ctxt: SyntaxContext::root(),
+            parent: None,
+        }
+    };
     let level = match level {
         llvm::DiagnosticLevel::Error => Level::Error,
         llvm::DiagnosticLevel::Warning => Level::Warning,
         llvm::DiagnosticLevel::Note | llvm::DiagnosticLevel::Remark => Level::Note,
     };
-    cgcx.diag_emitter.inline_asm_error(cookie.try_into().unwrap(), msg, level, source);
+    let msg = msg.strip_prefix("error: ").unwrap_or(&msg).to_string();
+    cgcx.diag_emitter.inline_asm_error(span, msg, level, source);
 }
 
 unsafe extern "C" fn diagnostic_handler(info: &DiagnosticInfo, user: *mut c_void) {
@@ -507,7 +527,7 @@ fn get_pgo_sample_use_path(config: &ModuleConfig) -> Option<CString> {
 }
 
 fn get_instr_profile_output_path(config: &ModuleConfig) -> Option<CString> {
-    config.instrument_coverage.then(|| CString::new("default_%m_%p.profraw").unwrap())
+    config.instrument_coverage.then(|| c"default_%m_%p.profraw".to_owned())
 }
 
 pub(crate) unsafe fn llvm_optimize(
@@ -517,9 +537,35 @@ pub(crate) unsafe fn llvm_optimize(
     config: &ModuleConfig,
     opt_level: config::OptLevel,
     opt_stage: llvm::OptStage,
+    skip_size_increasing_opts: bool,
 ) -> Result<(), FatalError> {
-    let unroll_loops =
-        opt_level != config::OptLevel::Size && opt_level != config::OptLevel::SizeMin;
+    // Enzyme:
+    // The whole point of compiler based AD is to differentiate optimized IR instead of unoptimized
+    // source code. However, benchmarks show that optimizations increasing the code size
+    // tend to reduce AD performance. Therefore deactivate them before AD, then differentiate the code
+    // and finally re-optimize the module, now with all optimizations available.
+    // FIXME(ZuseZ4): In a future update we could figure out how to only optimize individual functions getting
+    // differentiated.
+
+    let unroll_loops;
+    let vectorize_slp;
+    let vectorize_loop;
+
+    // When we build rustc with enzyme/autodiff support, we want to postpone size-increasing
+    // optimizations until after differentiation. FIXME(ZuseZ4): Before shipping on nightly,
+    // we should make this more granular, or at least check that the user has at least one autodiff
+    // call in their code, to justify altering the compilation pipeline.
+    if skip_size_increasing_opts && cfg!(llvm_enzyme) {
+        unroll_loops = false;
+        vectorize_slp = false;
+        vectorize_loop = false;
+    } else {
+        unroll_loops =
+            opt_level != config::OptLevel::Size && opt_level != config::OptLevel::SizeMin;
+        vectorize_slp = config.vectorize_slp;
+        vectorize_loop = config.vectorize_loop;
+    }
+    trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop);
     let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
     let pgo_gen_path = get_pgo_gen_path(config);
     let pgo_use_path = get_pgo_use_path(config);
@@ -583,8 +629,8 @@ pub(crate) unsafe fn llvm_optimize(
             using_thin_buffers,
             config.merge_functions,
             unroll_loops,
-            config.vectorize_slp,
-            config.vectorize_loop,
+            vectorize_slp,
+            vectorize_loop,
             config.no_builtins,
             config.emit_lifetime_markers,
             sanitizer_options.as_ref(),
@@ -628,6 +674,8 @@ pub(crate) unsafe fn optimize(
         unsafe { llvm::LLVMWriteBitcodeToFile(llmod, out.as_ptr()) };
     }
 
+    // FIXME(ZuseZ4): support SanitizeHWAddress and prevent illegal/unsupported opts
+
     if let Some(opt_level) = config.opt_level {
         let opt_stage = match cgcx.lto {
             Lto::Fat => llvm::OptStage::PreLinkFatLTO,
@@ -635,7 +683,20 @@ pub(crate) unsafe fn optimize(
             _ if cgcx.opts.cg.linker_plugin_lto.enabled() => llvm::OptStage::PreLinkThinLTO,
             _ => llvm::OptStage::PreLinkNoLTO,
         };
-        return unsafe { llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage) };
+
+        // If we know that we will later run AD, then we disable vectorization and loop unrolling
+        let skip_size_increasing_opts = cfg!(llvm_enzyme);
+        return unsafe {
+            llvm_optimize(
+                cgcx,
+                dcx,
+                module,
+                config,
+                opt_level,
+                opt_stage,
+                skip_size_increasing_opts,
+            )
+        };
     }
     Ok(())
 }
@@ -840,6 +901,7 @@ pub(crate) unsafe fn codegen(
                         None,
                         llvm::FileType::AssemblyFile,
                         &cgcx.prof,
+                        config.verify_llvm_ir,
                     )
                 })?;
             }
@@ -877,6 +939,7 @@ pub(crate) unsafe fn codegen(
                             dwo_out,
                             llvm::FileType::ObjectFile,
                             &cgcx.prof,
+                            config.verify_llvm_ir,
                         )
                     })?;
                 }
diff --git a/compiler/rustc_codegen_llvm/src/base.rs b/compiler/rustc_codegen_llvm/src/base.rs
index f62310bd948..d05faf5577b 100644
--- a/compiler/rustc_codegen_llvm/src/base.rs
+++ b/compiler/rustc_codegen_llvm/src/base.rs
@@ -23,7 +23,7 @@ use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
 use rustc_middle::mir::mono::{Linkage, Visibility};
 use rustc_middle::ty::TyCtxt;
 use rustc_session::config::DebugInfo;
-use rustc_span::symbol::Symbol;
+use rustc_span::Symbol;
 use rustc_target::spec::SanitizerSet;
 
 use super::ModuleLlvm;
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index b5bb7630ca6..5a34b52e6ef 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -2,6 +2,8 @@ use std::borrow::Cow;
 use std::ops::Deref;
 use std::{iter, ptr};
 
+pub(crate) mod autodiff;
+
 use libc::{c_char, c_uint};
 use rustc_abi as abi;
 use rustc_abi::{Align, Size, WrappingRange};
@@ -608,14 +610,6 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     }
 
     fn range_metadata(&mut self, load: &'ll Value, range: WrappingRange) {
-        if self.sess().target.arch == "amdgpu" {
-            // amdgpu/LLVM does something weird and thinks an i64 value is
-            // split into a v2i32, halving the bitwidth LLVM expects,
-            // tripping an assertion. So, for now, just disable this
-            // optimization.
-            return;
-        }
-
         if self.cx.sess().opts.optimize == OptLevel::No {
             // Don't emit metadata we're not going to use
             return;
diff --git a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
new file mode 100644
index 00000000000..38f7eaa090f
--- /dev/null
+++ b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
@@ -0,0 +1,344 @@
+use std::ptr;
+
+use rustc_ast::expand::autodiff_attrs::{AutoDiffAttrs, AutoDiffItem, DiffActivity, DiffMode};
+use rustc_codegen_ssa::ModuleCodegen;
+use rustc_codegen_ssa::back::write::ModuleConfig;
+use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, BuilderMethods};
+use rustc_errors::FatalError;
+use rustc_middle::ty::TyCtxt;
+use rustc_session::config::Lto;
+use tracing::{debug, trace};
+
+use crate::back::write::{llvm_err, llvm_optimize};
+use crate::builder::Builder;
+use crate::declare::declare_raw_fn;
+use crate::errors::LlvmError;
+use crate::llvm::AttributePlace::Function;
+use crate::llvm::{Metadata, True};
+use crate::value::Value;
+use crate::{CodegenContext, LlvmCodegenBackend, ModuleLlvm, attributes, context, llvm};
+
+fn get_params(fnc: &Value) -> Vec<&Value> {
+    unsafe {
+        let param_num = llvm::LLVMCountParams(fnc) as usize;
+        let mut fnc_args: Vec<&Value> = vec![];
+        fnc_args.reserve(param_num);
+        llvm::LLVMGetParams(fnc, fnc_args.as_mut_ptr());
+        fnc_args.set_len(param_num);
+        fnc_args
+    }
+}
+
+/// When differentiating `fn_to_diff`, take a `outer_fn` and generate another
+/// function with expected naming and calling conventions[^1] which will be
+/// discovered by the enzyme LLVM pass and its body populated with the differentiated
+/// `fn_to_diff`. `outer_fn` is then modified to have a call to the generated
+/// function and handle the differences between the Rust calling convention and
+/// Enzyme.
+/// [^1]: <https://enzyme.mit.edu/getting_started/CallingConvention/>
+// FIXME(ZuseZ4): `outer_fn` should include upstream safety checks to
+// cover some assumptions of enzyme/autodiff, which could lead to UB otherwise.
+fn generate_enzyme_call<'ll, 'tcx>(
+    cx: &context::CodegenCx<'ll, 'tcx>,
+    fn_to_diff: &'ll Value,
+    outer_fn: &'ll Value,
+    attrs: AutoDiffAttrs,
+) {
+    let inputs = attrs.input_activity;
+    let output = attrs.ret_activity;
+
+    // We have to pick the name depending on whether we want forward or reverse mode autodiff.
+    // FIXME(ZuseZ4): The new pass based approach should not need the {Forward/Reverse}First method anymore, since
+    // it will handle higher-order derivatives correctly automatically (in theory). Currently
+    // higher-order derivatives fail, so we should debug that before adjusting this code.
+    let mut ad_name: String = match attrs.mode {
+        DiffMode::Forward => "__enzyme_fwddiff",
+        DiffMode::Reverse => "__enzyme_autodiff",
+        DiffMode::ForwardFirst => "__enzyme_fwddiff",
+        DiffMode::ReverseFirst => "__enzyme_autodiff",
+        _ => panic!("logic bug in autodiff, unrecognized mode"),
+    }
+    .to_string();
+
+    // add outer_fn name to ad_name to make it unique, in case users apply autodiff to multiple
+    // functions. Unwrap will only panic, if LLVM gave us an invalid string.
+    let name = llvm::get_value_name(outer_fn);
+    let outer_fn_name = std::ffi::CStr::from_bytes_with_nul(name).unwrap().to_str().unwrap();
+    ad_name.push_str(outer_fn_name.to_string().as_str());
+
+    // Let us assume the user wrote the following function square:
+    //
+    // ```llvm
+    // define double @square(double %x) {
+    // entry:
+    //  %0 = fmul double %x, %x
+    //  ret double %0
+    // }
+    // ```
+    //
+    // The user now applies autodiff to the function square, in which case fn_to_diff will be `square`.
+    // Our macro generates the following placeholder code (slightly simplified):
+    //
+    // ```llvm
+    // define double @dsquare(double %x) {
+    //  ; placeholder code
+    //  return 0.0;
+    // }
+    // ```
+    //
+    // so our `outer_fn` will be `dsquare`. The unsafe code section below now removes the placeholder
+    // code and inserts an autodiff call. We also add a declaration for the __enzyme_autodiff call.
+    // Again, the arguments to all functions are slightly simplified.
+    // ```llvm
+    // declare double @__enzyme_autodiff_square(...)
+    //
+    // define double @dsquare(double %x) {
+    // entry:
+    //   %0 = tail call double (...) @__enzyme_autodiff_square(double (double)* nonnull @square, double %x)
+    //   ret double %0
+    // }
+    // ```
+    unsafe {
+        // On LLVM-IR, we can luckily declare __enzyme_ functions without specifying the input
+        // arguments. We do however need to declare them with their correct return type.
+        // We already figured the correct return type out in our frontend, when generating the outer_fn,
+        // so we can now just go ahead and use that. FIXME(ZuseZ4): This doesn't handle sret yet.
+        let fn_ty = llvm::LLVMGlobalGetValueType(outer_fn);
+        let ret_ty = llvm::LLVMGetReturnType(fn_ty);
+
+        // LLVM can figure out the input types on it's own, so we take a shortcut here.
+        let enzyme_ty = llvm::LLVMFunctionType(ret_ty, ptr::null(), 0, True);
+
+        //FIXME(ZuseZ4): the CC/Addr/Vis values are best effort guesses, we should look at tests and
+        // think a bit more about what should go here.
+        let cc = llvm::LLVMGetFunctionCallConv(outer_fn);
+        let ad_fn = declare_raw_fn(
+            cx,
+            &ad_name,
+            llvm::CallConv::try_from(cc).expect("invalid callconv"),
+            llvm::UnnamedAddr::No,
+            llvm::Visibility::Default,
+            enzyme_ty,
+        );
+
+        // Otherwise LLVM might inline our temporary code before the enzyme pass has a chance to
+        // do it's work.
+        let attr = llvm::AttributeKind::NoInline.create_attr(cx.llcx);
+        attributes::apply_to_llfn(ad_fn, Function, &[attr]);
+
+        // first, remove all calls from fnc
+        let entry = llvm::LLVMGetFirstBasicBlock(outer_fn);
+        let br = llvm::LLVMRustGetTerminator(entry);
+        llvm::LLVMRustEraseInstFromParent(br);
+
+        let last_inst = llvm::LLVMRustGetLastInstruction(entry).unwrap();
+        let mut builder = Builder::build(cx, entry);
+
+        let num_args = llvm::LLVMCountParams(&fn_to_diff);
+        let mut args = Vec::with_capacity(num_args as usize + 1);
+        args.push(fn_to_diff);
+
+        let enzyme_const = cx.create_metadata("enzyme_const".to_string()).unwrap();
+        let enzyme_out = cx.create_metadata("enzyme_out".to_string()).unwrap();
+        let enzyme_dup = cx.create_metadata("enzyme_dup".to_string()).unwrap();
+        let enzyme_dupnoneed = cx.create_metadata("enzyme_dupnoneed".to_string()).unwrap();
+        let enzyme_primal_ret = cx.create_metadata("enzyme_primal_return".to_string()).unwrap();
+
+        match output {
+            DiffActivity::Dual => {
+                args.push(cx.get_metadata_value(enzyme_primal_ret));
+            }
+            DiffActivity::Active => {
+                args.push(cx.get_metadata_value(enzyme_primal_ret));
+            }
+            _ => {}
+        }
+
+        trace!("matching autodiff arguments");
+        // We now handle the issue that Rust level arguments not always match the llvm-ir level
+        // arguments. A slice, `&[f32]`, for example, is represented as a pointer and a length on
+        // llvm-ir level. The number of activities matches the number of Rust level arguments, so we
+        // need to match those.
+        // FIXME(ZuseZ4): This logic is a bit more complicated than it should be, can we simplify it
+        // using iterators and peek()?
+        let mut outer_pos: usize = 0;
+        let mut activity_pos = 0;
+        let outer_args: Vec<&llvm::Value> = get_params(outer_fn);
+        while activity_pos < inputs.len() {
+            let activity = inputs[activity_pos as usize];
+            // Duplicated arguments received a shadow argument, into which enzyme will write the
+            // gradient.
+            let (activity, duplicated): (&Metadata, bool) = match activity {
+                DiffActivity::None => panic!("not a valid input activity"),
+                DiffActivity::Const => (enzyme_const, false),
+                DiffActivity::Active => (enzyme_out, false),
+                DiffActivity::ActiveOnly => (enzyme_out, false),
+                DiffActivity::Dual => (enzyme_dup, true),
+                DiffActivity::DualOnly => (enzyme_dupnoneed, true),
+                DiffActivity::Duplicated => (enzyme_dup, true),
+                DiffActivity::DuplicatedOnly => (enzyme_dupnoneed, true),
+                DiffActivity::FakeActivitySize => (enzyme_const, false),
+            };
+            let outer_arg = outer_args[outer_pos];
+            args.push(cx.get_metadata_value(activity));
+            args.push(outer_arg);
+            if duplicated {
+                // We know that duplicated args by construction have a following argument,
+                // so this can not be out of bounds.
+                let next_outer_arg = outer_args[outer_pos + 1];
+                let next_outer_ty = cx.val_ty(next_outer_arg);
+                // FIXME(ZuseZ4): We should add support for Vec here too, but it's less urgent since
+                // vectors behind references (&Vec<T>) are already supported. Users can not pass a
+                // Vec by value for reverse mode, so this would only help forward mode autodiff.
+                let slice = {
+                    if activity_pos + 1 >= inputs.len() {
+                        // If there is no arg following our ptr, it also can't be a slice,
+                        // since that would lead to a ptr, int pair.
+                        false
+                    } else {
+                        let next_activity = inputs[activity_pos + 1];
+                        // We analyze the MIR types and add this dummy activity if we visit a slice.
+                        next_activity == DiffActivity::FakeActivitySize
+                    }
+                };
+                if slice {
+                    // A duplicated slice will have the following two outer_fn arguments:
+                    // (..., ptr1, int1, ptr2, int2, ...). We add the following llvm-ir to our __enzyme call:
+                    // (..., metadata! enzyme_dup, ptr, ptr, int1, ...).
+                    // FIXME(ZuseZ4): We will upstream a safety check later which asserts that
+                    // int2 >= int1, which means the shadow vector is large enough to store the gradient.
+                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Integer);
+                    let next_outer_arg2 = outer_args[outer_pos + 2];
+                    let next_outer_ty2 = cx.val_ty(next_outer_arg2);
+                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty2) == llvm::TypeKind::Pointer);
+                    let next_outer_arg3 = outer_args[outer_pos + 3];
+                    let next_outer_ty3 = cx.val_ty(next_outer_arg3);
+                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty3) == llvm::TypeKind::Integer);
+                    args.push(next_outer_arg2);
+                    args.push(cx.get_metadata_value(enzyme_const));
+                    args.push(next_outer_arg);
+                    outer_pos += 4;
+                    activity_pos += 2;
+                } else {
+                    // A duplicated pointer will have the following two outer_fn arguments:
+                    // (..., ptr, ptr, ...). We add the following llvm-ir to our __enzyme call:
+                    // (..., metadata! enzyme_dup, ptr, ptr, ...).
+                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Pointer);
+                    args.push(next_outer_arg);
+                    outer_pos += 2;
+                    activity_pos += 1;
+                }
+            } else {
+                // We do not differentiate with resprect to this argument.
+                // We already added the metadata and argument above, so just increase the counters.
+                outer_pos += 1;
+                activity_pos += 1;
+            }
+        }
+
+        let call = builder.call(enzyme_ty, None, None, ad_fn, &args, None, None);
+
+        // This part is a bit iffy. LLVM requires that a call to an inlineable function has some
+        // metadata attachted to it, but we just created this code oota. Given that the
+        // differentiated function already has partly confusing metadata, and given that this
+        // affects nothing but the auttodiff IR, we take a shortcut and just steal metadata from the
+        // dummy code which we inserted at a higher level.
+        // FIXME(ZuseZ4): Work with Enzyme core devs to clarify what debug metadata issues we have,
+        // and how to best improve it for enzyme core and rust-enzyme.
+        let md_ty = cx.get_md_kind_id("dbg");
+        if llvm::LLVMRustHasMetadata(last_inst, md_ty) {
+            let md = llvm::LLVMRustDIGetInstMetadata(last_inst)
+                .expect("failed to get instruction metadata");
+            let md_todiff = cx.get_metadata_value(md);
+            llvm::LLVMSetMetadata(call, md_ty, md_todiff);
+        } else {
+            // We don't panic, since depending on whether we are in debug or release mode, we might
+            // have no debug info to copy, which would then be ok.
+            trace!("no dbg info");
+        }
+        // Now that we copied the metadata, get rid of dummy code.
+        llvm::LLVMRustEraseInstBefore(entry, last_inst);
+        llvm::LLVMRustEraseInstFromParent(last_inst);
+
+        if cx.val_ty(outer_fn) != cx.type_void() {
+            builder.ret(call);
+        } else {
+            builder.ret_void();
+        }
+
+        // Let's crash in case that we messed something up above and generated invalid IR.
+        llvm::LLVMRustVerifyFunction(
+            outer_fn,
+            llvm::LLVMRustVerifierFailureAction::LLVMAbortProcessAction,
+        );
+    }
+}
+
+pub(crate) fn differentiate<'ll, 'tcx>(
+    module: &'ll ModuleCodegen<ModuleLlvm>,
+    cgcx: &CodegenContext<LlvmCodegenBackend>,
+    tcx: TyCtxt<'tcx>,
+    diff_items: Vec<AutoDiffItem>,
+    config: &ModuleConfig,
+) -> Result<(), FatalError> {
+    for item in &diff_items {
+        trace!("{}", item);
+    }
+
+    let diag_handler = cgcx.create_dcx();
+    let (_, cgus) = tcx.collect_and_partition_mono_items(());
+    let cx = context::CodegenCx::new(tcx, &cgus.first().unwrap(), &module.module_llvm);
+
+    // Before dumping the module, we want all the TypeTrees to become part of the module.
+    for item in diff_items.iter() {
+        let name = item.source.clone();
+        let fn_def: Option<&llvm::Value> = cx.get_function(&name);
+        let Some(fn_def) = fn_def else {
+            return Err(llvm_err(diag_handler.handle(), LlvmError::PrepareAutoDiff {
+                src: item.source.clone(),
+                target: item.target.clone(),
+                error: "could not find source function".to_owned(),
+            }));
+        };
+        debug!(?item.target);
+        let fn_target: Option<&llvm::Value> = cx.get_function(&item.target);
+        let Some(fn_target) = fn_target else {
+            return Err(llvm_err(diag_handler.handle(), LlvmError::PrepareAutoDiff {
+                src: item.source.clone(),
+                target: item.target.clone(),
+                error: "could not find target function".to_owned(),
+            }));
+        };
+
+        generate_enzyme_call(&cx, fn_def, fn_target, item.attrs.clone());
+    }
+
+    // FIXME(ZuseZ4): support SanitizeHWAddress and prevent illegal/unsupported opts
+
+    if let Some(opt_level) = config.opt_level {
+        let opt_stage = match cgcx.lto {
+            Lto::Fat => llvm::OptStage::PreLinkFatLTO,
+            Lto::Thin | Lto::ThinLocal => llvm::OptStage::PreLinkThinLTO,
+            _ if cgcx.opts.cg.linker_plugin_lto.enabled() => llvm::OptStage::PreLinkThinLTO,
+            _ => llvm::OptStage::PreLinkNoLTO,
+        };
+        // This is our second opt call, so now we run all opts,
+        // to make sure we get the best performance.
+        let skip_size_increasing_opts = false;
+        trace!("running Module Optimization after differentiation");
+        unsafe {
+            llvm_optimize(
+                cgcx,
+                diag_handler.handle(),
+                module,
+                config,
+                opt_level,
+                opt_stage,
+                skip_size_increasing_opts,
+            )?
+        };
+    }
+    trace!("done with differentiate()");
+
+    Ok(())
+}
diff --git a/compiler/rustc_codegen_llvm/src/callee.rs b/compiler/rustc_codegen_llvm/src/callee.rs
index e0a2de3366c..aa9a0f34f55 100644
--- a/compiler/rustc_codegen_llvm/src/callee.rs
+++ b/compiler/rustc_codegen_llvm/src/callee.rs
@@ -104,7 +104,10 @@ pub(crate) fn get_fn<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, instance: Instance<'t
 
             let is_hidden = if is_generic {
                 // This is a monomorphization of a generic function.
-                if !cx.tcx.sess.opts.share_generics() {
+                if !(cx.tcx.sess.opts.share_generics()
+                    || tcx.codegen_fn_attrs(instance_def_id).inline
+                        == rustc_attr_parsing::InlineAttr::Never)
+                {
                     // When not sharing generics, all instances are in the same
                     // crate and have hidden visibility.
                     true
diff --git a/compiler/rustc_codegen_llvm/src/common.rs b/compiler/rustc_codegen_llvm/src/common.rs
index 8852dec7d9f..adfe8aeb5c5 100644
--- a/compiler/rustc_codegen_llvm/src/common.rs
+++ b/compiler/rustc_codegen_llvm/src/common.rs
@@ -302,10 +302,9 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
                             (value, AddressSpace::DATA)
                         }
                     }
-                    GlobalAlloc::Function { instance, .. } => (
-                        self.get_fn_addr(instance.polymorphize(self.tcx)),
-                        self.data_layout().instruction_address_space,
-                    ),
+                    GlobalAlloc::Function { instance, .. } => {
+                        (self.get_fn_addr(instance), self.data_layout().instruction_address_space)
+                    }
                     GlobalAlloc::VTable(ty, dyn_ty) => {
                         let alloc = self
                             .tcx
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index 841c110b3c8..d8fbe51b975 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -1,6 +1,6 @@
 use std::borrow::Borrow;
 use std::cell::{Cell, RefCell};
-use std::ffi::{CStr, c_uint};
+use std::ffi::{CStr, c_char, c_uint};
 use std::str;
 
 use rustc_abi::{HasDataLayout, TargetDataLayout, VariantIdx};
@@ -82,8 +82,8 @@ pub(crate) struct CodegenCx<'ll, 'tcx> {
 
     pub isize_ty: &'ll Type,
 
-    /// Extra codegen state needed when coverage instrumentation is enabled.
-    pub coverage_cx: Option<coverageinfo::CrateCoverageContext<'ll, 'tcx>>,
+    /// Extra per-CGU codegen state needed when coverage instrumentation is enabled.
+    pub coverage_cx: Option<coverageinfo::CguCoverageContext<'ll, 'tcx>>,
     pub dbg_cx: Option<debuginfo::CodegenUnitDebugContext<'ll, 'tcx>>,
 
     eh_personality: Cell<Option<&'ll Value>>,
@@ -159,6 +159,16 @@ pub(crate) unsafe fn create_module<'ll>(
             // See https://github.com/llvm/llvm-project/pull/112084
             target_data_layout = target_data_layout.replace("-i128:128", "");
         }
+        if sess.target.arch.starts_with("powerpc64") {
+            // LLVM 20 updates the powerpc64 layout to correctly align 128 bit integers to 128 bit.
+            // See https://github.com/llvm/llvm-project/pull/118004
+            target_data_layout = target_data_layout.replace("-i128:128", "");
+        }
+        if sess.target.arch.starts_with("wasm32") || sess.target.arch.starts_with("wasm64") {
+            // LLVM 20 updates the wasm(32|64) layout to correctly align 128 bit integers to 128 bit.
+            // See https://github.com/llvm/llvm-project/pull/119204
+            target_data_layout = target_data_layout.replace("-i128:128", "");
+        }
     }
 
     // Ensure the data-layout values hardcoded remain the defaults.
@@ -525,7 +535,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
         let (llcx, llmod) = (&*llvm_module.llcx, llvm_module.llmod());
 
         let coverage_cx =
-            tcx.sess.instrument_coverage().then(coverageinfo::CrateCoverageContext::new);
+            tcx.sess.instrument_coverage().then(coverageinfo::CguCoverageContext::new);
 
         let dbg_cx = if tcx.sess.opts.debuginfo != DebugInfo::None {
             let dctx = debuginfo::CodegenUnitDebugContext::new(llmod);
@@ -576,7 +586,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
     /// Extra state that is only available when coverage instrumentation is enabled.
     #[inline]
     #[track_caller]
-    pub(crate) fn coverage_cx(&self) -> &coverageinfo::CrateCoverageContext<'ll, 'tcx> {
+    pub(crate) fn coverage_cx(&self) -> &coverageinfo::CguCoverageContext<'ll, 'tcx> {
         self.coverage_cx.as_ref().expect("only called when coverage instrumentation is enabled")
     }
 
@@ -590,6 +600,31 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
             llvm::set_section(g, c"llvm.metadata");
         }
     }
+
+    pub(crate) fn get_metadata_value(&self, metadata: &'ll Metadata) -> &'ll Value {
+        unsafe { llvm::LLVMMetadataAsValue(self.llcx, metadata) }
+    }
+
+    pub(crate) fn get_function(&self, name: &str) -> Option<&'ll Value> {
+        let name = SmallCStr::new(name);
+        unsafe { llvm::LLVMGetNamedFunction(self.llmod, name.as_ptr()) }
+    }
+
+    pub(crate) fn get_md_kind_id(&self, name: &str) -> u32 {
+        unsafe {
+            llvm::LLVMGetMDKindIDInContext(
+                self.llcx,
+                name.as_ptr() as *const c_char,
+                name.len() as c_uint,
+            )
+        }
+    }
+
+    pub(crate) fn create_metadata(&self, name: String) -> Option<&'ll Metadata> {
+        Some(unsafe {
+            llvm::LLVMMDStringInContext2(self.llcx, name.as_ptr() as *const c_char, name.len())
+        })
+    }
 }
 
 impl<'ll, 'tcx> MiscCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs
index 19d6726002c..b617f4d37f5 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs
@@ -136,6 +136,34 @@ pub(crate) struct CoverageSpan {
     pub(crate) end_col: u32,
 }
 
+/// Holds tables of the various region types in one struct.
+///
+/// Don't pass this struct across FFI; pass the individual region tables as
+/// pointer/length pairs instead.
+///
+/// Each field name has a `_regions` suffix for improved readability after
+/// exhaustive destructing, which ensures that all region types are handled.
+#[derive(Clone, Debug, Default)]
+pub(crate) struct Regions {
+    pub(crate) code_regions: Vec<CodeRegion>,
+    pub(crate) branch_regions: Vec<BranchRegion>,
+    pub(crate) mcdc_branch_regions: Vec<MCDCBranchRegion>,
+    pub(crate) mcdc_decision_regions: Vec<MCDCDecisionRegion>,
+}
+
+impl Regions {
+    /// Returns true if none of this structure's tables contain any regions.
+    pub(crate) fn has_no_regions(&self) -> bool {
+        let Self { code_regions, branch_regions, mcdc_branch_regions, mcdc_decision_regions } =
+            self;
+
+        code_regions.is_empty()
+            && branch_regions.is_empty()
+            && mcdc_branch_regions.is_empty()
+            && mcdc_decision_regions.is_empty()
+    }
+}
+
 /// Must match the layout of `LLVMRustCoverageCodeRegion`.
 #[derive(Clone, Debug)]
 #[repr(C)]
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs
index 99c2d12b261..2cd7fa3225a 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs
@@ -40,11 +40,10 @@ pub(crate) fn create_pgo_func_name_var<'ll>(
     }
 }
 
-pub(crate) fn write_filenames_to_buffer<'a>(
-    filenames: impl IntoIterator<Item = &'a str>,
-) -> Vec<u8> {
+pub(crate) fn write_filenames_to_buffer(filenames: &[impl AsRef<str>]) -> Vec<u8> {
     let (pointers, lengths) = filenames
         .into_iter()
+        .map(AsRef::as_ref)
         .map(|s: &str| (s.as_c_char_ptr(), s.len()))
         .unzip::<_, _, Vec<_>, Vec<_>>();
 
@@ -62,11 +61,10 @@ pub(crate) fn write_filenames_to_buffer<'a>(
 pub(crate) fn write_function_mappings_to_buffer(
     virtual_file_mapping: &[u32],
     expressions: &[ffi::CounterExpression],
-    code_regions: &[ffi::CodeRegion],
-    branch_regions: &[ffi::BranchRegion],
-    mcdc_branch_regions: &[ffi::MCDCBranchRegion],
-    mcdc_decision_regions: &[ffi::MCDCDecisionRegion],
+    regions: &ffi::Regions,
 ) -> Vec<u8> {
+    let ffi::Regions { code_regions, branch_regions, mcdc_branch_regions, mcdc_decision_regions } =
+        regions;
     llvm::build_byte_buffer(|buffer| unsafe {
         llvm::LLVMRustCoverageWriteFunctionMappingsToBuffer(
             virtual_file_mapping.as_ptr(),
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs
deleted file mode 100644
index 95746b88ced..00000000000
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs
+++ /dev/null
@@ -1,274 +0,0 @@
-use rustc_data_structures::captures::Captures;
-use rustc_data_structures::fx::FxIndexSet;
-use rustc_index::bit_set::BitSet;
-use rustc_middle::mir::coverage::{
-    CounterId, CovTerm, Expression, ExpressionId, FunctionCoverageInfo, Mapping, MappingKind, Op,
-};
-use rustc_middle::ty::Instance;
-use rustc_span::Span;
-use tracing::{debug, instrument};
-
-use crate::coverageinfo::ffi::{Counter, CounterExpression, ExprKind};
-
-/// Holds all of the coverage mapping data associated with a function instance,
-/// collected during traversal of `Coverage` statements in the function's MIR.
-#[derive(Debug)]
-pub(crate) struct FunctionCoverageCollector<'tcx> {
-    /// Coverage info that was attached to this function by the instrumentor.
-    function_coverage_info: &'tcx FunctionCoverageInfo,
-    is_used: bool,
-
-    /// Tracks which counters have been seen, so that we can identify mappings
-    /// to counters that were optimized out, and set them to zero.
-    counters_seen: BitSet<CounterId>,
-    /// Contains all expression IDs that have been seen in an `ExpressionUsed`
-    /// coverage statement, plus all expression IDs that aren't directly used
-    /// by any mappings (and therefore do not have expression-used statements).
-    /// After MIR traversal is finished, we can conclude that any IDs missing
-    /// from this set must have had their statements deleted by MIR opts.
-    expressions_seen: BitSet<ExpressionId>,
-}
-
-impl<'tcx> FunctionCoverageCollector<'tcx> {
-    /// Creates a new set of coverage data for a used (called) function.
-    pub(crate) fn new(
-        instance: Instance<'tcx>,
-        function_coverage_info: &'tcx FunctionCoverageInfo,
-    ) -> Self {
-        Self::create(instance, function_coverage_info, true)
-    }
-
-    /// Creates a new set of coverage data for an unused (never called) function.
-    pub(crate) fn unused(
-        instance: Instance<'tcx>,
-        function_coverage_info: &'tcx FunctionCoverageInfo,
-    ) -> Self {
-        Self::create(instance, function_coverage_info, false)
-    }
-
-    fn create(
-        instance: Instance<'tcx>,
-        function_coverage_info: &'tcx FunctionCoverageInfo,
-        is_used: bool,
-    ) -> Self {
-        let num_counters = function_coverage_info.num_counters;
-        let num_expressions = function_coverage_info.expressions.len();
-        debug!(
-            "FunctionCoverage::create(instance={instance:?}) has \
-            num_counters={num_counters}, num_expressions={num_expressions}, is_used={is_used}"
-        );
-
-        // Create a filled set of expression IDs, so that expressions not
-        // directly used by mappings will be treated as "seen".
-        // (If they end up being unused, LLVM will delete them for us.)
-        let mut expressions_seen = BitSet::new_filled(num_expressions);
-        // For each expression ID that is directly used by one or more mappings,
-        // mark it as not-yet-seen. This indicates that we expect to see a
-        // corresponding `ExpressionUsed` statement during MIR traversal.
-        for mapping in function_coverage_info.mappings.iter() {
-            // Currently we only worry about ordinary code mappings.
-            // For branch and MC/DC mappings, expressions might not correspond
-            // to any particular point in the control-flow graph.
-            // (Keep this in sync with the injection of `ExpressionUsed`
-            // statements in the `InstrumentCoverage` MIR pass.)
-            if let MappingKind::Code(term) = mapping.kind
-                && let CovTerm::Expression(id) = term
-            {
-                expressions_seen.remove(id);
-            }
-        }
-
-        Self {
-            function_coverage_info,
-            is_used,
-            counters_seen: BitSet::new_empty(num_counters),
-            expressions_seen,
-        }
-    }
-
-    /// Marks a counter ID as having been seen in a counter-increment statement.
-    #[instrument(level = "debug", skip(self))]
-    pub(crate) fn mark_counter_id_seen(&mut self, id: CounterId) {
-        self.counters_seen.insert(id);
-    }
-
-    /// Marks an expression ID as having been seen in an expression-used statement.
-    #[instrument(level = "debug", skip(self))]
-    pub(crate) fn mark_expression_id_seen(&mut self, id: ExpressionId) {
-        self.expressions_seen.insert(id);
-    }
-
-    /// Identify expressions that will always have a value of zero, and note
-    /// their IDs in [`ZeroExpressions`]. Mappings that refer to a zero expression
-    /// can instead become mappings to a constant zero value.
-    ///
-    /// This method mainly exists to preserve the simplifications that were
-    /// already being performed by the Rust-side expression renumbering, so that
-    /// the resulting coverage mappings don't get worse.
-    fn identify_zero_expressions(&self) -> ZeroExpressions {
-        // The set of expressions that either were optimized out entirely, or
-        // have zero as both of their operands, and will therefore always have
-        // a value of zero. Other expressions that refer to these as operands
-        // can have those operands replaced with `CovTerm::Zero`.
-        let mut zero_expressions = ZeroExpressions::default();
-
-        // Simplify a copy of each expression based on lower-numbered expressions,
-        // and then update the set of always-zero expressions if necessary.
-        // (By construction, expressions can only refer to other expressions
-        // that have lower IDs, so one pass is sufficient.)
-        for (id, expression) in self.function_coverage_info.expressions.iter_enumerated() {
-            if !self.expressions_seen.contains(id) {
-                // If an expression was not seen, it must have been optimized away,
-                // so any operand that refers to it can be replaced with zero.
-                zero_expressions.insert(id);
-                continue;
-            }
-
-            // We don't need to simplify the actual expression data in the
-            // expressions list; we can just simplify a temporary copy and then
-            // use that to update the set of always-zero expressions.
-            let Expression { mut lhs, op, mut rhs } = *expression;
-
-            // If an expression has an operand that is also an expression, the
-            // operand's ID must be strictly lower. This is what lets us find
-            // all zero expressions in one pass.
-            let assert_operand_expression_is_lower = |operand_id: ExpressionId| {
-                assert!(
-                    operand_id < id,
-                    "Operand {operand_id:?} should be less than {id:?} in {expression:?}",
-                )
-            };
-
-            // If an operand refers to a counter or expression that is always
-            // zero, then that operand can be replaced with `CovTerm::Zero`.
-            let maybe_set_operand_to_zero = |operand: &mut CovTerm| {
-                if let CovTerm::Expression(id) = *operand {
-                    assert_operand_expression_is_lower(id);
-                }
-
-                if is_zero_term(&self.counters_seen, &zero_expressions, *operand) {
-                    *operand = CovTerm::Zero;
-                }
-            };
-            maybe_set_operand_to_zero(&mut lhs);
-            maybe_set_operand_to_zero(&mut rhs);
-
-            // Coverage counter values cannot be negative, so if an expression
-            // involves subtraction from zero, assume that its RHS must also be zero.
-            // (Do this after simplifications that could set the LHS to zero.)
-            if lhs == CovTerm::Zero && op == Op::Subtract {
-                rhs = CovTerm::Zero;
-            }
-
-            // After the above simplifications, if both operands are zero, then
-            // we know that this expression is always zero too.
-            if lhs == CovTerm::Zero && rhs == CovTerm::Zero {
-                zero_expressions.insert(id);
-            }
-        }
-
-        zero_expressions
-    }
-
-    pub(crate) fn into_finished(self) -> FunctionCoverage<'tcx> {
-        let zero_expressions = self.identify_zero_expressions();
-        let FunctionCoverageCollector { function_coverage_info, is_used, counters_seen, .. } = self;
-
-        FunctionCoverage { function_coverage_info, is_used, counters_seen, zero_expressions }
-    }
-}
-
-pub(crate) struct FunctionCoverage<'tcx> {
-    pub(crate) function_coverage_info: &'tcx FunctionCoverageInfo,
-    is_used: bool,
-
-    counters_seen: BitSet<CounterId>,
-    zero_expressions: ZeroExpressions,
-}
-
-impl<'tcx> FunctionCoverage<'tcx> {
-    /// Returns true for a used (called) function, and false for an unused function.
-    pub(crate) fn is_used(&self) -> bool {
-        self.is_used
-    }
-
-    /// Return the source hash, generated from the HIR node structure, and used to indicate whether
-    /// or not the source code structure changed between different compilations.
-    pub(crate) fn source_hash(&self) -> u64 {
-        if self.is_used { self.function_coverage_info.function_source_hash } else { 0 }
-    }
-
-    /// Convert this function's coverage expression data into a form that can be
-    /// passed through FFI to LLVM.
-    pub(crate) fn counter_expressions(
-        &self,
-    ) -> impl Iterator<Item = CounterExpression> + ExactSizeIterator + Captures<'_> {
-        // We know that LLVM will optimize out any unused expressions before
-        // producing the final coverage map, so there's no need to do the same
-        // thing on the Rust side unless we're confident we can do much better.
-        // (See `CounterExpressionsMinimizer` in `CoverageMappingWriter.cpp`.)
-
-        self.function_coverage_info.expressions.iter().map(move |&Expression { lhs, op, rhs }| {
-            CounterExpression {
-                lhs: self.counter_for_term(lhs),
-                kind: match op {
-                    Op::Add => ExprKind::Add,
-                    Op::Subtract => ExprKind::Subtract,
-                },
-                rhs: self.counter_for_term(rhs),
-            }
-        })
-    }
-
-    /// Yields all this function's coverage mappings, after simplifying away
-    /// unused counters and counter expressions.
-    pub(crate) fn mapping_spans(
-        &self,
-    ) -> impl Iterator<Item = (MappingKind, Span)> + ExactSizeIterator + Captures<'_> {
-        self.function_coverage_info.mappings.iter().map(move |mapping| {
-            let &Mapping { ref kind, span } = mapping;
-            let kind =
-                kind.map_terms(|term| if self.is_zero_term(term) { CovTerm::Zero } else { term });
-            (kind, span)
-        })
-    }
-
-    fn counter_for_term(&self, term: CovTerm) -> Counter {
-        if self.is_zero_term(term) { Counter::ZERO } else { Counter::from_term(term) }
-    }
-
-    fn is_zero_term(&self, term: CovTerm) -> bool {
-        is_zero_term(&self.counters_seen, &self.zero_expressions, term)
-    }
-}
-
-/// Set of expression IDs that are known to always evaluate to zero.
-/// Any mapping or expression operand that refers to these expressions can have
-/// that reference replaced with a constant zero value.
-#[derive(Default)]
-struct ZeroExpressions(FxIndexSet<ExpressionId>);
-
-impl ZeroExpressions {
-    fn insert(&mut self, id: ExpressionId) {
-        self.0.insert(id);
-    }
-
-    fn contains(&self, id: ExpressionId) -> bool {
-        self.0.contains(&id)
-    }
-}
-
-/// Returns `true` if the given term is known to have a value of zero, taking
-/// into account knowledge of which counters are unused and which expressions
-/// are always zero.
-fn is_zero_term(
-    counters_seen: &BitSet<CounterId>,
-    zero_expressions: &ZeroExpressions,
-    term: CovTerm,
-) -> bool {
-    match term {
-        CovTerm::Zero => true,
-        CovTerm::Counter(id) => !counters_seen.contains(id),
-        CovTerm::Expression(id) => zero_expressions.contains(id),
-    }
-}
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
index ed881418cb0..b3ad2a0e409 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
@@ -1,9 +1,6 @@
-mod spans;
-
-use std::ffi::CString;
 use std::sync::Arc;
 
-use itertools::Itertools as _;
+use itertools::Itertools;
 use rustc_abi::Align;
 use rustc_codegen_ssa::traits::{
     BaseTypeCodegenMethods, ConstCodegenMethods, StaticCodegenMethods,
@@ -11,21 +8,22 @@ use rustc_codegen_ssa::traits::{
 use rustc_data_structures::fx::{FxHashSet, FxIndexMap};
 use rustc_hir::def_id::{DefId, LocalDefId};
 use rustc_index::IndexVec;
-use rustc_middle::mir::coverage::MappingKind;
+use rustc_middle::mir;
 use rustc_middle::ty::{self, TyCtxt};
-use rustc_middle::{bug, mir};
 use rustc_session::RemapFileNameExt;
 use rustc_session::config::RemapPathScopeComponents;
 use rustc_span::def_id::DefIdSet;
 use rustc_span::{SourceFile, StableSourceFileId};
-use rustc_target::spec::HasTargetSpec;
 use tracing::debug;
 
 use crate::common::CodegenCx;
-use crate::coverageinfo::map_data::{FunctionCoverage, FunctionCoverageCollector};
-use crate::coverageinfo::{ffi, llvm_cov};
+use crate::coverageinfo::llvm_cov;
+use crate::coverageinfo::mapgen::covfun::prepare_covfun_record;
 use crate::llvm;
 
+mod covfun;
+mod spans;
+
 /// Generates and exports the coverage map, which is embedded in special
 /// linker sections in the final binary.
 ///
@@ -51,76 +49,62 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
 
     debug!("Generating coverage map for CodegenUnit: `{}`", cx.codegen_unit.name());
 
-    // In order to show that unused functions have coverage counts of zero (0), LLVM requires the
-    // functions exist. Generate synthetic functions with a (required) single counter, and add the
-    // MIR `Coverage` code regions to the `function_coverage_map`, before calling
-    // `ctx.take_function_coverage_map()`.
-    if cx.codegen_unit.is_code_coverage_dead_code_cgu() {
-        add_unused_functions(cx);
-    }
-
     // FIXME(#132395): Can this be none even when coverage is enabled?
-    let function_coverage_map = match cx.coverage_cx {
-        Some(ref cx) => cx.take_function_coverage_map(),
+    let instances_used = match cx.coverage_cx {
+        Some(ref cx) => cx.instances_used.borrow(),
         None => return,
     };
-    if function_coverage_map.is_empty() {
-        // This module has no functions with coverage instrumentation
-        return;
-    }
 
-    let function_coverage_entries = function_coverage_map
-        .into_iter()
-        .map(|(instance, function_coverage)| (instance, function_coverage.into_finished()))
-        .collect::<Vec<_>>();
+    // The order of entries in this global file table needs to be deterministic,
+    // and ideally should also be independent of the details of stable-hashing,
+    // because coverage tests snapshots (`.cov-map`) can observe the order and
+    // would need to be re-blessed if it changes. As long as those requirements
+    // are satisfied, the order can be arbitrary.
+    let mut global_file_table = GlobalFileTable::new();
 
-    let all_files = function_coverage_entries
+    let mut covfun_records = instances_used
         .iter()
-        .map(|(_, fn_cov)| fn_cov.function_coverage_info.body_span)
-        .map(|span| tcx.sess.source_map().lookup_source_file(span.lo()));
-    let global_file_table = GlobalFileTable::new(all_files);
-
-    // Encode all filenames referenced by coverage mappings in this CGU.
-    let filenames_buffer = global_file_table.make_filenames_buffer(tcx);
-
-    let filenames_size = filenames_buffer.len();
-    let filenames_val = cx.const_bytes(&filenames_buffer);
-    let filenames_ref = llvm_cov::hash_bytes(&filenames_buffer);
-
-    // Generate the coverage map header, which contains the filenames used by
-    // this CGU's coverage mappings, and store it in a well-known global.
-    generate_covmap_record(cx, covmap_version, filenames_size, filenames_val);
-
-    let mut unused_function_names = Vec::new();
+        .copied()
+        // Sort by symbol name, so that the global file table is built in an
+        // order that doesn't depend on the stable-hash-based order in which
+        // instances were visited during codegen.
+        .sorted_by_cached_key(|&instance| tcx.symbol_name(instance).name)
+        .filter_map(|instance| prepare_covfun_record(tcx, &mut global_file_table, instance, true))
+        .collect::<Vec<_>>();
 
-    // Encode coverage mappings and generate function records
-    for (instance, function_coverage) in function_coverage_entries {
-        debug!("Generate function coverage for {}, {:?}", cx.codegen_unit.name(), instance);
+    // In a single designated CGU, also prepare covfun records for functions
+    // in this crate that were instrumented for coverage, but are unused.
+    if cx.codegen_unit.is_code_coverage_dead_code_cgu() {
+        let mut unused_instances = gather_unused_function_instances(cx);
+        // Sort the unused instances by symbol name, for the same reason as the used ones.
+        unused_instances.sort_by_cached_key(|&instance| tcx.symbol_name(instance).name);
+        covfun_records.extend(unused_instances.into_iter().filter_map(|instance| {
+            prepare_covfun_record(tcx, &mut global_file_table, instance, false)
+        }));
+    }
 
-        let mangled_function_name = tcx.symbol_name(instance).name;
-        let source_hash = function_coverage.source_hash();
-        let is_used = function_coverage.is_used();
+    // If there are no covfun records for this CGU, don't generate a covmap record.
+    // Emitting a covmap record without any covfun records causes `llvm-cov` to
+    // fail when generating coverage reports, and if there are no covfun records
+    // then the covmap record isn't useful anyway.
+    // This should prevent a repeat of <https://github.com/rust-lang/rust/issues/133606>.
+    if covfun_records.is_empty() {
+        return;
+    }
 
-        let coverage_mapping_buffer =
-            encode_mappings_for_function(tcx, &global_file_table, &function_coverage);
+    // Encode all filenames referenced by coverage mappings in this CGU.
+    let filenames_buffer = global_file_table.make_filenames_buffer(tcx);
+    // The `llvm-cov` tool uses this hash to associate each covfun record with
+    // its corresponding filenames table, since the final binary will typically
+    // contain multiple covmap records from different compilation units.
+    let filenames_hash = llvm_cov::hash_bytes(&filenames_buffer);
 
-        if coverage_mapping_buffer.is_empty() {
-            debug!("function has no mappings to embed; skipping");
-            continue;
-        }
+    let mut unused_function_names = vec![];
 
-        if !is_used {
-            unused_function_names.push(mangled_function_name);
-        }
+    for covfun in &covfun_records {
+        unused_function_names.extend(covfun.mangled_function_name_if_unused());
 
-        generate_covfun_record(
-            cx,
-            mangled_function_name,
-            source_hash,
-            filenames_ref,
-            coverage_mapping_buffer,
-            is_used,
-        );
+        covfun::generate_covfun_record(cx, filenames_hash, covfun)
     }
 
     // For unused functions, we need to take their mangled names and store them
@@ -141,6 +125,11 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
         llvm::set_linkage(array, llvm::Linkage::InternalLinkage);
         llvm::set_initializer(array, initializer);
     }
+
+    // Generate the coverage map header, which contains the filenames used by
+    // this CGU's coverage mappings, and store it in a well-known global.
+    // (This is skipped if we returned early due to having no covfun records.)
+    generate_covmap_record(cx, covmap_version, &filenames_buffer);
 }
 
 /// Maps "global" (per-CGU) file ID numbers to their underlying source files.
@@ -151,27 +140,16 @@ struct GlobalFileTable {
 }
 
 impl GlobalFileTable {
-    fn new(all_files: impl IntoIterator<Item = Arc<SourceFile>>) -> Self {
-        // Collect all of the files into a set. Files usually come in contiguous
-        // runs, so we can dedup adjacent ones to save work.
-        let mut raw_file_table = all_files
-            .into_iter()
-            .dedup_by(|a, b| a.stable_id == b.stable_id)
-            .map(|f| (f.stable_id, f))
-            .collect::<FxIndexMap<StableSourceFileId, Arc<SourceFile>>>();
-
-        // Sort the file table by its underlying filenames.
-        raw_file_table.sort_unstable_by(|_, a, _, b| {
-            Ord::cmp(&a.name, &b.name).then_with(|| Ord::cmp(&a.stable_id, &b.stable_id))
-        });
-
-        Self { raw_file_table }
+    fn new() -> Self {
+        Self { raw_file_table: FxIndexMap::default() }
     }
 
-    fn global_file_id_for_file(&self, file: &SourceFile) -> GlobalFileId {
-        let raw_id = self.raw_file_table.get_index_of(&file.stable_id).unwrap_or_else(|| {
-            bug!("file not found in prepared global file table: {:?}", file.name);
-        });
+    fn global_file_id_for_file(&mut self, file: &Arc<SourceFile>) -> GlobalFileId {
+        // Ensure the given file has a table entry, and get its index.
+        let entry = self.raw_file_table.entry(file.stable_id);
+        let raw_id = entry.index();
+        entry.or_insert_with(|| Arc::clone(file));
+
         // The raw file table doesn't include an entry for the working dir
         // (which has ID 0), so add 1 to get the correct ID.
         GlobalFileId::from_usize(raw_id + 1)
@@ -198,7 +176,7 @@ impl GlobalFileTable {
             file.name.for_scope(tcx.sess, RemapPathScopeComponents::MACRO).to_string_lossy()
         }));
 
-        llvm_cov::write_filenames_to_buffer(table.iter().map(|f| f.as_ref()))
+        llvm_cov::write_filenames_to_buffer(&table)
     }
 }
 
@@ -216,7 +194,7 @@ rustc_index::newtype_index! {
 
 /// Holds a mapping from "local" (per-function) file IDs to "global" (per-CGU)
 /// file IDs.
-#[derive(Default)]
+#[derive(Debug, Default)]
 struct VirtualFileMapping {
     local_to_global: IndexVec<LocalFileId, GlobalFileId>,
     global_to_local: FxIndexMap<GlobalFileId, LocalFileId>,
@@ -230,187 +208,45 @@ impl VirtualFileMapping {
             .or_insert_with(|| self.local_to_global.push(global_file_id))
     }
 
-    fn into_vec(self) -> Vec<u32> {
-        // This conversion should be optimized away to ~zero overhead.
-        // In any case, it's probably not hot enough to worry about.
-        self.local_to_global.into_iter().map(|global| global.as_u32()).collect()
-    }
-}
-
-/// Using the expressions and counter regions collected for a single function,
-/// generate the variable-sized payload of its corresponding `__llvm_covfun`
-/// entry. The payload is returned as a vector of bytes.
-///
-/// Newly-encountered filenames will be added to the global file table.
-fn encode_mappings_for_function(
-    tcx: TyCtxt<'_>,
-    global_file_table: &GlobalFileTable,
-    function_coverage: &FunctionCoverage<'_>,
-) -> Vec<u8> {
-    let mapping_spans = function_coverage.mapping_spans();
-    if mapping_spans.is_empty() {
-        return Vec::new();
-    }
-
-    let fn_cov_info = function_coverage.function_coverage_info;
-
-    let expressions = function_coverage.counter_expressions().collect::<Vec<_>>();
-
-    let mut virtual_file_mapping = VirtualFileMapping::default();
-    let mut code_regions = vec![];
-    let mut branch_regions = vec![];
-    let mut mcdc_branch_regions = vec![];
-    let mut mcdc_decision_regions = vec![];
-
-    // Currently a function's mappings must all be in the same file as its body span.
-    let source_map = tcx.sess.source_map();
-    let source_file = source_map.lookup_source_file(fn_cov_info.body_span.lo());
-
-    // Look up the global file ID for that file.
-    let global_file_id = global_file_table.global_file_id_for_file(&source_file);
-
-    // Associate that global file ID with a local file ID for this function.
-    let local_file_id = virtual_file_mapping.local_id_for_global(global_file_id);
-
-    let make_cov_span = |span| {
-        spans::make_coverage_span(local_file_id, source_map, fn_cov_info, &source_file, span)
-    };
-
-    // For each coverage mapping span in this function+file, convert it to a
-    // form suitable for FFI.
-    for (mapping_kind, span) in mapping_spans {
-        debug!("Adding counter {mapping_kind:?} to map for {span:?}");
-        let Some(cov_span) = make_cov_span(span) else { continue };
-        match mapping_kind {
-            MappingKind::Code(term) => {
-                code_regions
-                    .push(ffi::CodeRegion { cov_span, counter: ffi::Counter::from_term(term) });
-            }
-            MappingKind::Branch { true_term, false_term } => {
-                branch_regions.push(ffi::BranchRegion {
-                    cov_span,
-                    true_counter: ffi::Counter::from_term(true_term),
-                    false_counter: ffi::Counter::from_term(false_term),
-                });
-            }
-            MappingKind::MCDCBranch { true_term, false_term, mcdc_params } => {
-                mcdc_branch_regions.push(ffi::MCDCBranchRegion {
-                    cov_span,
-                    true_counter: ffi::Counter::from_term(true_term),
-                    false_counter: ffi::Counter::from_term(false_term),
-                    mcdc_branch_params: ffi::mcdc::BranchParameters::from(mcdc_params),
-                });
-            }
-            MappingKind::MCDCDecision(mcdc_decision_params) => {
-                mcdc_decision_regions.push(ffi::MCDCDecisionRegion {
-                    cov_span,
-                    mcdc_decision_params: ffi::mcdc::DecisionParameters::from(mcdc_decision_params),
-                });
-            }
-        }
+    fn to_vec(&self) -> Vec<u32> {
+        // This clone could be avoided by transmuting `&[GlobalFileId]` to `&[u32]`,
+        // but it isn't hot or expensive enough to justify the extra unsafety.
+        self.local_to_global.iter().map(|&global| GlobalFileId::as_u32(global)).collect()
     }
-
-    // Encode the function's coverage mappings into a buffer.
-    llvm_cov::write_function_mappings_to_buffer(
-        &virtual_file_mapping.into_vec(),
-        &expressions,
-        &code_regions,
-        &branch_regions,
-        &mcdc_branch_regions,
-        &mcdc_decision_regions,
-    )
 }
 
 /// Generates the contents of the covmap record for this CGU, which mostly
 /// consists of a header and a list of filenames. The record is then stored
 /// as a global variable in the `__llvm_covmap` section.
-fn generate_covmap_record<'ll>(
-    cx: &CodegenCx<'ll, '_>,
-    version: u32,
-    filenames_size: usize,
-    filenames_val: &'ll llvm::Value,
-) {
-    debug!("cov map: filenames_size = {}, 0-based version = {}", filenames_size, version);
-
-    // Create the coverage data header (Note, fields 0 and 2 are now always zero,
-    // as of `llvm::coverage::CovMapVersion::Version4`.)
-    let zero_was_n_records_val = cx.const_u32(0);
-    let filenames_size_val = cx.const_u32(filenames_size as u32);
-    let zero_was_coverage_size_val = cx.const_u32(0);
-    let version_val = cx.const_u32(version);
-    let cov_data_header_val = cx.const_struct(
-        &[zero_was_n_records_val, filenames_size_val, zero_was_coverage_size_val, version_val],
-        /*packed=*/ false,
-    );
-
-    // Create the complete LLVM coverage data value to add to the LLVM IR
-    let covmap_data =
-        cx.const_struct(&[cov_data_header_val, filenames_val], /*packed=*/ false);
-
-    let llglobal = llvm::add_global(cx.llmod, cx.val_ty(covmap_data), &llvm_cov::covmap_var_name());
-    llvm::set_initializer(llglobal, covmap_data);
-    llvm::set_global_constant(llglobal, true);
-    llvm::set_linkage(llglobal, llvm::Linkage::PrivateLinkage);
-    llvm::set_section(llglobal, &llvm_cov::covmap_section_name(cx.llmod));
-    // LLVM's coverage mapping format specifies 8-byte alignment for items in this section.
-    // <https://llvm.org/docs/CoverageMappingFormat.html>
-    llvm::set_alignment(llglobal, Align::EIGHT);
-    cx.add_used_global(llglobal);
-}
-
-/// Generates the contents of the covfun record for this function, which
-/// contains the function's coverage mapping data. The record is then stored
-/// as a global variable in the `__llvm_covfun` section.
-fn generate_covfun_record(
-    cx: &CodegenCx<'_, '_>,
-    mangled_function_name: &str,
-    source_hash: u64,
-    filenames_ref: u64,
-    coverage_mapping_buffer: Vec<u8>,
-    is_used: bool,
-) {
-    // Concatenate the encoded coverage mappings
-    let coverage_mapping_size = coverage_mapping_buffer.len();
-    let coverage_mapping_val = cx.const_bytes(&coverage_mapping_buffer);
-
-    let func_name_hash = llvm_cov::hash_bytes(mangled_function_name.as_bytes());
-    let func_name_hash_val = cx.const_u64(func_name_hash);
-    let coverage_mapping_size_val = cx.const_u32(coverage_mapping_size as u32);
-    let source_hash_val = cx.const_u64(source_hash);
-    let filenames_ref_val = cx.const_u64(filenames_ref);
-    let func_record_val = cx.const_struct(
+fn generate_covmap_record<'ll>(cx: &CodegenCx<'ll, '_>, version: u32, filenames_buffer: &[u8]) {
+    // A covmap record consists of four target-endian u32 values, followed by
+    // the encoded filenames table. Two of the header fields are unused in
+    // modern versions of the LLVM coverage mapping format, and are always 0.
+    // <https://llvm.org/docs/CoverageMappingFormat.html#llvm-ir-representation>
+    // See also `src/llvm-project/clang/lib/CodeGen/CoverageMappingGen.cpp`.
+    let covmap_header = cx.const_struct(
         &[
-            func_name_hash_val,
-            coverage_mapping_size_val,
-            source_hash_val,
-            filenames_ref_val,
-            coverage_mapping_val,
+            cx.const_u32(0), // (unused)
+            cx.const_u32(filenames_buffer.len() as u32),
+            cx.const_u32(0), // (unused)
+            cx.const_u32(version),
         ],
-        /*packed=*/ true,
+        /* packed */ false,
     );
-
-    // Choose a variable name to hold this function's covfun data.
-    // Functions that are used have a suffix ("u") to distinguish them from
-    // unused copies of the same function (from different CGUs), so that if a
-    // linker sees both it won't discard the used copy's data.
-    let func_record_var_name =
-        CString::new(format!("__covrec_{:X}{}", func_name_hash, if is_used { "u" } else { "" }))
-            .unwrap();
-    debug!("function record var name: {:?}", func_record_var_name);
-
-    let llglobal = llvm::add_global(cx.llmod, cx.val_ty(func_record_val), &func_record_var_name);
-    llvm::set_initializer(llglobal, func_record_val);
-    llvm::set_global_constant(llglobal, true);
-    llvm::set_linkage(llglobal, llvm::Linkage::LinkOnceODRLinkage);
-    llvm::set_visibility(llglobal, llvm::Visibility::Hidden);
-    llvm::set_section(llglobal, cx.covfun_section_name());
+    let covmap_record = cx
+        .const_struct(&[covmap_header, cx.const_bytes(filenames_buffer)], /* packed */ false);
+
+    let covmap_global =
+        llvm::add_global(cx.llmod, cx.val_ty(covmap_record), &llvm_cov::covmap_var_name());
+    llvm::set_initializer(covmap_global, covmap_record);
+    llvm::set_global_constant(covmap_global, true);
+    llvm::set_linkage(covmap_global, llvm::Linkage::PrivateLinkage);
+    llvm::set_section(covmap_global, &llvm_cov::covmap_section_name(cx.llmod));
     // LLVM's coverage mapping format specifies 8-byte alignment for items in this section.
     // <https://llvm.org/docs/CoverageMappingFormat.html>
-    llvm::set_alignment(llglobal, Align::EIGHT);
-    if cx.target_spec().supports_comdat() {
-        llvm::set_comdat(cx.llmod, llglobal, &func_record_var_name);
-    }
-    cx.add_used_global(llglobal);
+    llvm::set_alignment(covmap_global, Align::EIGHT);
+
+    cx.add_used_global(covmap_global);
 }
 
 /// Each CGU will normally only emit coverage metadata for the functions that it actually generates.
@@ -421,39 +257,35 @@ fn generate_covfun_record(
 /// coverage map (in a single designated CGU) so that we still emit coverage mappings for them.
 /// We also end up adding their symbol names to a special global array that LLVM will include in
 /// its embedded coverage data.
-fn add_unused_functions(cx: &CodegenCx<'_, '_>) {
+fn gather_unused_function_instances<'tcx>(cx: &CodegenCx<'_, 'tcx>) -> Vec<ty::Instance<'tcx>> {
     assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
 
     let tcx = cx.tcx;
     let usage = prepare_usage_sets(tcx);
 
     let is_unused_fn = |def_id: LocalDefId| -> bool {
-        let def_id = def_id.to_def_id();
-
-        // To be eligible for "unused function" mappings, a definition must:
-        // - Be function-like
+        // Usage sets expect `DefId`, so convert from `LocalDefId`.
+        let d: DefId = LocalDefId::to_def_id(def_id);
+        // To be potentially eligible for "unused function" mappings, a definition must:
+        // - Be eligible for coverage instrumentation
         // - Not participate directly in codegen (or have lost all its coverage statements)
         // - Not have any coverage statements inlined into codegenned functions
-        tcx.def_kind(def_id).is_fn_like()
-            && (!usage.all_mono_items.contains(&def_id)
-                || usage.missing_own_coverage.contains(&def_id))
-            && !usage.used_via_inlining.contains(&def_id)
+        tcx.is_eligible_for_coverage(def_id)
+            && (!usage.all_mono_items.contains(&d) || usage.missing_own_coverage.contains(&d))
+            && !usage.used_via_inlining.contains(&d)
     };
 
-    // Scan for unused functions that were instrumented for coverage.
-    for def_id in tcx.mir_keys(()).iter().copied().filter(|&def_id| is_unused_fn(def_id)) {
-        // Get the coverage info from MIR, skipping functions that were never instrumented.
-        let body = tcx.optimized_mir(def_id);
-        let Some(function_coverage_info) = body.function_coverage_info.as_deref() else { continue };
-
-        // FIXME(79651): Consider trying to filter out dummy instantiations of
-        // unused generic functions from library crates, because they can produce
-        // "unused instantiation" in coverage reports even when they are actually
-        // used by some downstream crate in the same binary.
+    // FIXME(#79651): Consider trying to filter out dummy instantiations of
+    // unused generic functions from library crates, because they can produce
+    // "unused instantiation" in coverage reports even when they are actually
+    // used by some downstream crate in the same binary.
 
-        debug!("generating unused fn: {def_id:?}");
-        add_unused_function_coverage(cx, def_id, function_coverage_info);
-    }
+    tcx.mir_keys(())
+        .iter()
+        .copied()
+        .filter(|&def_id| is_unused_fn(def_id))
+        .map(|def_id| make_dummy_instance(tcx, def_id))
+        .collect::<Vec<_>>()
 }
 
 struct UsageSets<'tcx> {
@@ -518,16 +350,11 @@ fn prepare_usage_sets<'tcx>(tcx: TyCtxt<'tcx>) -> UsageSets<'tcx> {
     UsageSets { all_mono_items, used_via_inlining, missing_own_coverage }
 }
 
-fn add_unused_function_coverage<'tcx>(
-    cx: &CodegenCx<'_, 'tcx>,
-    def_id: LocalDefId,
-    function_coverage_info: &'tcx mir::coverage::FunctionCoverageInfo,
-) {
-    let tcx = cx.tcx;
-    let def_id = def_id.to_def_id();
+fn make_dummy_instance<'tcx>(tcx: TyCtxt<'tcx>, local_def_id: LocalDefId) -> ty::Instance<'tcx> {
+    let def_id = local_def_id.to_def_id();
 
     // Make a dummy instance that fills in all generics with placeholders.
-    let instance = ty::Instance::new(
+    ty::Instance::new(
         def_id,
         ty::GenericArgs::for_item(tcx, def_id, |param, _| {
             if let ty::GenericParamDefKind::Lifetime = param.kind {
@@ -536,11 +363,5 @@ fn add_unused_function_coverage<'tcx>(
                 tcx.mk_param_from_def(param)
             }
         }),
-    );
-
-    // An unused function's mappings will automatically be rewritten to map to
-    // zero, because none of its counters/expressions are marked as seen.
-    let function_coverage = FunctionCoverageCollector::unused(instance, function_coverage_info);
-
-    cx.coverage_cx().function_coverage_map.borrow_mut().insert(instance, function_coverage);
+    )
 }
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
new file mode 100644
index 00000000000..5428d776f41
--- /dev/null
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
@@ -0,0 +1,252 @@
+//! For each function that was instrumented for coverage, we need to embed its
+//! corresponding coverage mapping metadata inside the `__llvm_covfun`[^win]
+//! linker section of the final binary.
+//!
+//! [^win]: On Windows the section name is `.lcovfun`.
+
+use std::ffi::CString;
+
+use rustc_abi::Align;
+use rustc_codegen_ssa::traits::{
+    BaseTypeCodegenMethods, ConstCodegenMethods, StaticCodegenMethods,
+};
+use rustc_middle::mir::coverage::{
+    CovTerm, CoverageIdsInfo, Expression, FunctionCoverageInfo, Mapping, MappingKind, Op,
+};
+use rustc_middle::ty::{Instance, TyCtxt};
+use rustc_span::Span;
+use rustc_target::spec::HasTargetSpec;
+use tracing::debug;
+
+use crate::common::CodegenCx;
+use crate::coverageinfo::mapgen::{GlobalFileTable, VirtualFileMapping, spans};
+use crate::coverageinfo::{ffi, llvm_cov};
+use crate::llvm;
+
+/// Intermediate coverage metadata for a single function, used to help build
+/// the final record that will be embedded in the `__llvm_covfun` section.
+#[derive(Debug)]
+pub(crate) struct CovfunRecord<'tcx> {
+    mangled_function_name: &'tcx str,
+    source_hash: u64,
+    is_used: bool,
+
+    virtual_file_mapping: VirtualFileMapping,
+    expressions: Vec<ffi::CounterExpression>,
+    regions: ffi::Regions,
+}
+
+impl<'tcx> CovfunRecord<'tcx> {
+    /// FIXME(Zalathar): Make this the responsibility of the code that determines
+    /// which functions are unused.
+    pub(crate) fn mangled_function_name_if_unused(&self) -> Option<&'tcx str> {
+        (!self.is_used).then_some(self.mangled_function_name)
+    }
+}
+
+pub(crate) fn prepare_covfun_record<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    global_file_table: &mut GlobalFileTable,
+    instance: Instance<'tcx>,
+    is_used: bool,
+) -> Option<CovfunRecord<'tcx>> {
+    let fn_cov_info = tcx.instance_mir(instance.def).function_coverage_info.as_deref()?;
+    let ids_info = tcx.coverage_ids_info(instance.def);
+
+    let expressions = prepare_expressions(fn_cov_info, ids_info, is_used);
+
+    let mut covfun = CovfunRecord {
+        mangled_function_name: tcx.symbol_name(instance).name,
+        source_hash: if is_used { fn_cov_info.function_source_hash } else { 0 },
+        is_used,
+        virtual_file_mapping: VirtualFileMapping::default(),
+        expressions,
+        regions: ffi::Regions::default(),
+    };
+
+    fill_region_tables(tcx, global_file_table, fn_cov_info, ids_info, &mut covfun);
+
+    if covfun.regions.has_no_regions() {
+        debug!(?covfun, "function has no mappings to embed; skipping");
+        return None;
+    }
+
+    Some(covfun)
+}
+
+/// Convert the function's coverage-counter expressions into a form suitable for FFI.
+fn prepare_expressions(
+    fn_cov_info: &FunctionCoverageInfo,
+    ids_info: &CoverageIdsInfo,
+    is_used: bool,
+) -> Vec<ffi::CounterExpression> {
+    // If any counters or expressions were removed by MIR opts, replace their
+    // terms with zero.
+    let counter_for_term = |term| {
+        if !is_used || ids_info.is_zero_term(term) {
+            ffi::Counter::ZERO
+        } else {
+            ffi::Counter::from_term(term)
+        }
+    };
+
+    // We know that LLVM will optimize out any unused expressions before
+    // producing the final coverage map, so there's no need to do the same
+    // thing on the Rust side unless we're confident we can do much better.
+    // (See `CounterExpressionsMinimizer` in `CoverageMappingWriter.cpp`.)
+    fn_cov_info
+        .expressions
+        .iter()
+        .map(move |&Expression { lhs, op, rhs }| ffi::CounterExpression {
+            lhs: counter_for_term(lhs),
+            kind: match op {
+                Op::Add => ffi::ExprKind::Add,
+                Op::Subtract => ffi::ExprKind::Subtract,
+            },
+            rhs: counter_for_term(rhs),
+        })
+        .collect::<Vec<_>>()
+}
+
+/// Populates the mapping region tables in the current function's covfun record.
+fn fill_region_tables<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    global_file_table: &mut GlobalFileTable,
+    fn_cov_info: &'tcx FunctionCoverageInfo,
+    ids_info: &'tcx CoverageIdsInfo,
+    covfun: &mut CovfunRecord<'tcx>,
+) {
+    // Currently a function's mappings must all be in the same file as its body span.
+    let source_map = tcx.sess.source_map();
+    let source_file = source_map.lookup_source_file(fn_cov_info.body_span.lo());
+
+    // Look up the global file ID for that file.
+    let global_file_id = global_file_table.global_file_id_for_file(&source_file);
+
+    // Associate that global file ID with a local file ID for this function.
+    let local_file_id = covfun.virtual_file_mapping.local_id_for_global(global_file_id);
+
+    let ffi::Regions { code_regions, branch_regions, mcdc_branch_regions, mcdc_decision_regions } =
+        &mut covfun.regions;
+
+    let make_cov_span = |span: Span| {
+        spans::make_coverage_span(local_file_id, source_map, fn_cov_info, &source_file, span)
+    };
+    let discard_all = tcx.sess.coverage_discard_all_spans_in_codegen();
+
+    // For each counter/region pair in this function+file, convert it to a
+    // form suitable for FFI.
+    let is_zero_term = |term| !covfun.is_used || ids_info.is_zero_term(term);
+    for &Mapping { ref kind, span } in &fn_cov_info.mappings {
+        // If the mapping refers to counters/expressions that were removed by
+        // MIR opts, replace those occurrences with zero.
+        let kind = kind.map_terms(|term| if is_zero_term(term) { CovTerm::Zero } else { term });
+
+        // Convert the `Span` into coordinates that we can pass to LLVM, or
+        // discard the span if conversion fails. In rare, cases _all_ of a
+        // function's spans are discarded, and the rest of coverage codegen
+        // needs to handle that gracefully to avoid a repeat of #133606.
+        // We don't have a good test case for triggering that organically, so
+        // instead we set `-Zcoverage-options=discard-all-spans-in-codegen`
+        // to force it to occur.
+        let Some(cov_span) = make_cov_span(span) else { continue };
+        if discard_all {
+            continue;
+        }
+
+        match kind {
+            MappingKind::Code(term) => {
+                code_regions
+                    .push(ffi::CodeRegion { cov_span, counter: ffi::Counter::from_term(term) });
+            }
+            MappingKind::Branch { true_term, false_term } => {
+                branch_regions.push(ffi::BranchRegion {
+                    cov_span,
+                    true_counter: ffi::Counter::from_term(true_term),
+                    false_counter: ffi::Counter::from_term(false_term),
+                });
+            }
+            MappingKind::MCDCBranch { true_term, false_term, mcdc_params } => {
+                mcdc_branch_regions.push(ffi::MCDCBranchRegion {
+                    cov_span,
+                    true_counter: ffi::Counter::from_term(true_term),
+                    false_counter: ffi::Counter::from_term(false_term),
+                    mcdc_branch_params: ffi::mcdc::BranchParameters::from(mcdc_params),
+                });
+            }
+            MappingKind::MCDCDecision(mcdc_decision_params) => {
+                mcdc_decision_regions.push(ffi::MCDCDecisionRegion {
+                    cov_span,
+                    mcdc_decision_params: ffi::mcdc::DecisionParameters::from(mcdc_decision_params),
+                });
+            }
+        }
+    }
+}
+
+/// Generates the contents of the covfun record for this function, which
+/// contains the function's coverage mapping data. The record is then stored
+/// as a global variable in the `__llvm_covfun` section.
+pub(crate) fn generate_covfun_record<'tcx>(
+    cx: &CodegenCx<'_, 'tcx>,
+    filenames_hash: u64,
+    covfun: &CovfunRecord<'tcx>,
+) {
+    let &CovfunRecord {
+        mangled_function_name,
+        source_hash,
+        is_used,
+        ref virtual_file_mapping,
+        ref expressions,
+        ref regions,
+    } = covfun;
+
+    // Encode the function's coverage mappings into a buffer.
+    let coverage_mapping_buffer = llvm_cov::write_function_mappings_to_buffer(
+        &virtual_file_mapping.to_vec(),
+        expressions,
+        regions,
+    );
+
+    // A covfun record consists of four target-endian integers, followed by the
+    // encoded mapping data in bytes. Note that the length field is 32 bits.
+    // <https://llvm.org/docs/CoverageMappingFormat.html#llvm-ir-representation>
+    // See also `src/llvm-project/clang/lib/CodeGen/CoverageMappingGen.cpp` and
+    // `COVMAP_V3` in `src/llvm-project/llvm/include/llvm/ProfileData/InstrProfData.inc`.
+    let func_name_hash = llvm_cov::hash_bytes(mangled_function_name.as_bytes());
+    let covfun_record = cx.const_struct(
+        &[
+            cx.const_u64(func_name_hash),
+            cx.const_u32(coverage_mapping_buffer.len() as u32),
+            cx.const_u64(source_hash),
+            cx.const_u64(filenames_hash),
+            cx.const_bytes(&coverage_mapping_buffer),
+        ],
+        // This struct needs to be packed, so that the 32-bit length field
+        // doesn't have unexpected padding.
+        true,
+    );
+
+    // Choose a variable name to hold this function's covfun data.
+    // Functions that are used have a suffix ("u") to distinguish them from
+    // unused copies of the same function (from different CGUs), so that if a
+    // linker sees both it won't discard the used copy's data.
+    let u = if is_used { "u" } else { "" };
+    let covfun_var_name = CString::new(format!("__covrec_{func_name_hash:X}{u}")).unwrap();
+    debug!("function record var name: {covfun_var_name:?}");
+
+    let covfun_global = llvm::add_global(cx.llmod, cx.val_ty(covfun_record), &covfun_var_name);
+    llvm::set_initializer(covfun_global, covfun_record);
+    llvm::set_global_constant(covfun_global, true);
+    llvm::set_linkage(covfun_global, llvm::Linkage::LinkOnceODRLinkage);
+    llvm::set_visibility(covfun_global, llvm::Visibility::Hidden);
+    llvm::set_section(covfun_global, cx.covfun_section_name());
+    // LLVM's coverage mapping format specifies 8-byte alignment for items in this section.
+    // <https://llvm.org/docs/CoverageMappingFormat.html>
+    llvm::set_alignment(covfun_global, Align::EIGHT);
+    if cx.target_spec().supports_comdat() {
+        llvm::set_comdat(cx.llmod, covfun_global, &covfun_var_name);
+    }
+
+    cx.add_used_global(covfun_global);
+}
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs
index 4a7721879fd..6d1d91340c2 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs
@@ -77,8 +77,10 @@ fn ensure_non_empty_span(
 
     source_map
         .span_to_source(span, |src, start, end| try {
-            // We're only checking for specific ASCII characters, so we don't
-            // have to worry about multi-byte code points.
+            // Adjusting span endpoints by `BytePos(1)` is normally a bug,
+            // but in this case we have specifically checked that the character
+            // we're skipping over is one of two specific ASCII characters, so
+            // adjusting by exactly 1 byte is correct.
             if try_next && src.as_bytes()[end] == b'{' {
                 Some(span.with_hi(hi + BytePos(1)))
             } else if try_prev && src.as_bytes()[start - 1] == b'}' {
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
index bf773cd2667..7311cd9d230 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
@@ -5,7 +5,7 @@ use rustc_abi::Size;
 use rustc_codegen_ssa::traits::{
     BuilderMethods, ConstCodegenMethods, CoverageInfoBuilderMethods, MiscCodegenMethods,
 };
-use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
+use rustc_data_structures::fx::{FxHashMap, FxIndexSet};
 use rustc_middle::mir::coverage::CoverageKind;
 use rustc_middle::ty::Instance;
 use rustc_middle::ty::layout::HasTyCtxt;
@@ -13,41 +13,32 @@ use tracing::{debug, instrument};
 
 use crate::builder::Builder;
 use crate::common::CodegenCx;
-use crate::coverageinfo::map_data::FunctionCoverageCollector;
 use crate::llvm;
 
 pub(crate) mod ffi;
 mod llvm_cov;
-pub(crate) mod map_data;
 mod mapgen;
 
-/// A context object for maintaining all state needed by the coverageinfo module.
-pub(crate) struct CrateCoverageContext<'ll, 'tcx> {
+/// Extra per-CGU context/state needed for coverage instrumentation.
+pub(crate) struct CguCoverageContext<'ll, 'tcx> {
     /// Coverage data for each instrumented function identified by DefId.
-    pub(crate) function_coverage_map:
-        RefCell<FxIndexMap<Instance<'tcx>, FunctionCoverageCollector<'tcx>>>,
+    pub(crate) instances_used: RefCell<FxIndexSet<Instance<'tcx>>>,
     pub(crate) pgo_func_name_var_map: RefCell<FxHashMap<Instance<'tcx>, &'ll llvm::Value>>,
     pub(crate) mcdc_condition_bitmap_map: RefCell<FxHashMap<Instance<'tcx>, Vec<&'ll llvm::Value>>>,
 
     covfun_section_name: OnceCell<CString>,
 }
 
-impl<'ll, 'tcx> CrateCoverageContext<'ll, 'tcx> {
+impl<'ll, 'tcx> CguCoverageContext<'ll, 'tcx> {
     pub(crate) fn new() -> Self {
         Self {
-            function_coverage_map: Default::default(),
+            instances_used: RefCell::<FxIndexSet<_>>::default(),
             pgo_func_name_var_map: Default::default(),
             mcdc_condition_bitmap_map: Default::default(),
             covfun_section_name: Default::default(),
         }
     }
 
-    fn take_function_coverage_map(
-        &self,
-    ) -> FxIndexMap<Instance<'tcx>, FunctionCoverageCollector<'tcx>> {
-        self.function_coverage_map.replace(FxIndexMap::default())
-    }
-
     /// LLVM use a temp value to record evaluated mcdc test vector of each decision, which is
     /// called condition bitmap. In order to handle nested decisions, several condition bitmaps can
     /// be allocated for a function body. These values are named `mcdc.addr.{i}` and are a 32-bit
@@ -143,6 +134,13 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
 
         let bx = self;
 
+        // Due to LocalCopy instantiation or MIR inlining, coverage statements
+        // can end up in a crate that isn't doing coverage instrumentation.
+        // When that happens, we currently just discard those statements, so
+        // the corresponding code will be undercounted.
+        // FIXME(Zalathar): Find a better solution for mixed-coverage builds.
+        let Some(coverage_cx) = &bx.cx.coverage_cx else { return };
+
         let Some(function_coverage_info) =
             bx.tcx.instance_mir(instance.def).function_coverage_info.as_deref()
         else {
@@ -150,32 +148,22 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
             return;
         };
 
-        // FIXME(#132395): Unwrapping `coverage_cx` here has led to ICEs in the
-        // wild, so keep this early-return until we understand why.
-        let mut coverage_map = match bx.coverage_cx {
-            Some(ref cx) => cx.function_coverage_map.borrow_mut(),
-            None => return,
-        };
-        let func_coverage = coverage_map
-            .entry(instance)
-            .or_insert_with(|| FunctionCoverageCollector::new(instance, function_coverage_info));
+        // Mark the instance as used in this CGU, for coverage purposes.
+        // This includes functions that were not partitioned into this CGU,
+        // but were MIR-inlined into one of this CGU's functions.
+        coverage_cx.instances_used.borrow_mut().insert(instance);
 
         match *kind {
             CoverageKind::SpanMarker | CoverageKind::BlockMarker { .. } => unreachable!(
                 "marker statement {kind:?} should have been removed by CleanupPostBorrowck"
             ),
             CoverageKind::CounterIncrement { id } => {
-                func_coverage.mark_counter_id_seen(id);
-                // We need to explicitly drop the `RefMut` before calling into
-                // `instrprof_increment`, as that needs an exclusive borrow.
-                drop(coverage_map);
-
                 // The number of counters passed to `llvm.instrprof.increment` might
                 // be smaller than the number originally inserted by the instrumentor,
                 // if some high-numbered counters were removed by MIR optimizations.
                 // If so, LLVM's profiler runtime will use fewer physical counters.
                 let num_counters =
-                    bx.tcx().coverage_ids_info(instance.def).max_counter_id.as_u32() + 1;
+                    bx.tcx().coverage_ids_info(instance.def).num_counters_after_mir_opts();
                 assert!(
                     num_counters as usize <= function_coverage_info.num_counters,
                     "num_counters disagreement: query says {num_counters} but function info only has {}",
@@ -192,23 +180,23 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
                 );
                 bx.instrprof_increment(fn_name, hash, num_counters, index);
             }
-            CoverageKind::ExpressionUsed { id } => {
-                func_coverage.mark_expression_id_seen(id);
+            CoverageKind::ExpressionUsed { id: _ } => {
+                // Expression-used statements are markers that are handled by
+                // `coverage_ids_info`, so there's nothing to codegen here.
             }
             CoverageKind::CondBitmapUpdate { index, decision_depth } => {
-                drop(coverage_map);
-                let cond_bitmap = bx
-                    .coverage_cx()
+                let cond_bitmap = coverage_cx
                     .try_get_mcdc_condition_bitmap(&instance, decision_depth)
                     .expect("mcdc cond bitmap should have been allocated for updating");
                 let cond_index = bx.const_i32(index as i32);
                 bx.mcdc_condbitmap_update(cond_index, cond_bitmap);
             }
             CoverageKind::TestVectorBitmapUpdate { bitmap_idx, decision_depth } => {
-                drop(coverage_map);
-                let cond_bitmap = bx.coverage_cx()
-                                    .try_get_mcdc_condition_bitmap(&instance, decision_depth)
-                                    .expect("mcdc cond bitmap should have been allocated for merging into the global bitmap");
+                let cond_bitmap =
+                    coverage_cx.try_get_mcdc_condition_bitmap(&instance, decision_depth).expect(
+                        "mcdc cond bitmap should have been allocated for merging \
+                        into the global bitmap",
+                    );
                 assert!(
                     bitmap_idx as usize <= function_coverage_info.mcdc_bitmap_bits,
                     "bitmap index of the decision out of range"
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs b/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs
index 07bd0f4d1c1..e545ce386ed 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs
@@ -4,7 +4,7 @@ use rustc_codegen_ssa::mir::debuginfo::{DebugScope, FunctionDebugContext};
 use rustc_codegen_ssa::traits::*;
 use rustc_data_structures::fx::FxHashMap;
 use rustc_index::Idx;
-use rustc_index::bit_set::BitSet;
+use rustc_index::bit_set::DenseBitSet;
 use rustc_middle::mir::{Body, SourceScope};
 use rustc_middle::ty::layout::{FnAbiOf, HasTypingEnv};
 use rustc_middle::ty::{self, Instance};
@@ -27,7 +27,7 @@ pub(crate) fn compute_mir_scopes<'ll, 'tcx>(
 ) {
     // Find all scopes with variables defined in them.
     let variables = if cx.sess().opts.debuginfo == DebugInfo::Full {
-        let mut vars = BitSet::new_empty(mir.source_scopes.len());
+        let mut vars = DenseBitSet::new_empty(mir.source_scopes.len());
         // FIXME(eddyb) take into account that arguments always have debuginfo,
         // irrespective of their name (assuming full debuginfo is enabled).
         // NOTE(eddyb) actually, on second thought, those are always in the
@@ -40,7 +40,7 @@ pub(crate) fn compute_mir_scopes<'ll, 'tcx>(
         // Nothing to emit, of course.
         None
     };
-    let mut instantiated = BitSet::new_empty(mir.source_scopes.len());
+    let mut instantiated = DenseBitSet::new_empty(mir.source_scopes.len());
     let mut discriminators = FxHashMap::default();
     // Instantiate all scopes.
     for idx in 0..mir.source_scopes.len() {
@@ -63,9 +63,9 @@ fn make_mir_scope<'ll, 'tcx>(
     cx: &CodegenCx<'ll, 'tcx>,
     instance: Instance<'tcx>,
     mir: &Body<'tcx>,
-    variables: &Option<BitSet<SourceScope>>,
+    variables: &Option<DenseBitSet<SourceScope>>,
     debug_context: &mut FunctionDebugContext<'tcx, &'ll DIScope, &'ll DILocation>,
-    instantiated: &mut BitSet<SourceScope>,
+    instantiated: &mut DenseBitSet<SourceScope>,
     discriminators: &mut FxHashMap<BytePos, u32>,
     scope: SourceScope,
 ) {
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/dwarf_const.rs b/compiler/rustc_codegen_llvm/src/debuginfo/dwarf_const.rs
new file mode 100644
index 00000000000..40842915222
--- /dev/null
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/dwarf_const.rs
@@ -0,0 +1,37 @@
+//! Definitions of various DWARF-related constants.
+
+use libc::c_uint;
+
+/// Helper macro to let us redeclare gimli's constants as our own constants
+/// with a different type, with less risk of copy-paste errors.
+macro_rules! declare_constant {
+    (
+        $name:ident : $type:ty
+    ) => {
+        #[allow(non_upper_case_globals)]
+        pub(crate) const $name: $type = ::gimli::constants::$name.0 as $type;
+
+        // Assert that as-cast probably hasn't changed the value.
+        const _: () = assert!($name as i128 == ::gimli::constants::$name.0 as i128);
+    };
+}
+
+declare_constant!(DW_TAG_const_type: c_uint);
+
+// DWARF languages.
+declare_constant!(DW_LANG_Rust: c_uint);
+
+// DWARF attribute type encodings.
+declare_constant!(DW_ATE_boolean: c_uint);
+declare_constant!(DW_ATE_float: c_uint);
+declare_constant!(DW_ATE_signed: c_uint);
+declare_constant!(DW_ATE_unsigned: c_uint);
+declare_constant!(DW_ATE_UTF: c_uint);
+
+// DWARF expression operators.
+declare_constant!(DW_OP_deref: u64);
+declare_constant!(DW_OP_plus_uconst: u64);
+/// Defined by LLVM in `llvm/include/llvm/BinaryFormat/Dwarf.h`.
+/// Double-checked by a static assertion in `RustWrapper.cpp`.
+#[allow(non_upper_case_globals)]
+pub(crate) const DW_OP_LLVM_fragment: u64 = 0x1000;
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs b/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
index aef8642f199..2c9f1cda13a 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
@@ -7,7 +7,7 @@ use rustc_hir::def_id::LOCAL_CRATE;
 use rustc_middle::bug;
 use rustc_middle::middle::debugger_visualizer::DebuggerVisualizerType;
 use rustc_session::config::{CrateType, DebugInfo};
-use rustc_span::symbol::sym;
+use rustc_span::sym;
 
 use crate::builder::Builder;
 use crate::common::CodegenCx;
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
index ef16e5bb459..88e43e1c678 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
@@ -16,13 +16,13 @@ use rustc_middle::ty::{
     self, AdtKind, CoroutineArgsExt, Instance, PolyExistentialTraitRef, Ty, TyCtxt, Visibility,
 };
 use rustc_session::config::{self, DebugInfo, Lto};
-use rustc_span::symbol::Symbol;
-use rustc_span::{DUMMY_SP, FileName, FileNameDisplayPreference, SourceFile, hygiene};
+use rustc_span::{DUMMY_SP, FileName, FileNameDisplayPreference, SourceFile, Symbol, hygiene};
 use rustc_symbol_mangling::typeid_for_trait_ref;
 use rustc_target::spec::DebuginfoKind;
 use smallvec::smallvec;
 use tracing::{debug, instrument};
 
+pub(crate) use self::type_map::TypeMap;
 use self::type_map::{DINodeCreationResult, Stub, UniqueTypeId};
 use super::CodegenUnitDebugContext;
 use super::namespace::mangled_name_of_instance;
@@ -31,6 +31,7 @@ use super::utils::{
     DIB, create_DIArray, debug_context, get_namespace_for_item, is_node_local_to_unit,
 };
 use crate::common::{AsCCharPtr, CodegenCx};
+use crate::debuginfo::dwarf_const;
 use crate::debuginfo::metadata::type_map::build_type_with_children;
 use crate::debuginfo::utils::{WidePtrKind, wide_pointer_kind};
 use crate::llvm::debuginfo::{
@@ -60,20 +61,6 @@ impl fmt::Debug for llvm::Metadata {
     }
 }
 
-// From DWARF 5.
-// See http://www.dwarfstd.org/ShowIssue.php?issue=140129.1.
-const DW_LANG_RUST: c_uint = 0x1c;
-#[allow(non_upper_case_globals)]
-const DW_ATE_boolean: c_uint = 0x02;
-#[allow(non_upper_case_globals)]
-const DW_ATE_float: c_uint = 0x04;
-#[allow(non_upper_case_globals)]
-const DW_ATE_signed: c_uint = 0x05;
-#[allow(non_upper_case_globals)]
-const DW_ATE_unsigned: c_uint = 0x07;
-#[allow(non_upper_case_globals)]
-const DW_ATE_UTF: c_uint = 0x10;
-
 pub(super) const UNKNOWN_LINE_NUMBER: c_uint = 0;
 pub(super) const UNKNOWN_COLUMN_NUMBER: c_uint = 0;
 
@@ -88,8 +75,6 @@ type SmallVec<T> = smallvec::SmallVec<[T; 16]>;
 mod enums;
 mod type_map;
 
-pub(crate) use type_map::TypeMap;
-
 /// Returns from the enclosing function if the type debuginfo node with the given
 /// unique ID can be found in the type map.
 macro_rules! return_if_di_node_created_in_meantime {
@@ -203,6 +188,7 @@ fn build_pointer_or_reference_di_node<'ll, 'tcx>(
                     Stub::Struct,
                     unique_type_id,
                     &ptr_type_debuginfo_name,
+                    None,
                     cx.size_and_align_of(ptr_type),
                     NO_SCOPE_METADATA,
                     DIFlags::FlagZero,
@@ -259,6 +245,7 @@ fn build_pointer_or_reference_di_node<'ll, 'tcx>(
                             layout.fields.offset(abi::WIDE_PTR_ADDR),
                             DIFlags::FlagZero,
                             data_ptr_type_di_node,
+                            None,
                         ),
                         build_field_di_node(
                             cx,
@@ -268,6 +255,7 @@ fn build_pointer_or_reference_di_node<'ll, 'tcx>(
                             layout.fields.offset(abi::WIDE_PTR_EXTRA),
                             DIFlags::FlagZero,
                             type_di_node(cx, extra_field.ty),
+                            None,
                         ),
                     ]
                 },
@@ -369,6 +357,7 @@ fn build_dyn_type_di_node<'ll, 'tcx>(
                 Stub::Struct,
                 unique_type_id,
                 &type_name,
+                None,
                 cx.size_and_align_of(dyn_type),
                 NO_SCOPE_METADATA,
                 DIFlags::FlagZero,
@@ -467,8 +456,6 @@ pub(crate) fn type_di_node<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, t: Ty<'tcx>) ->
             AdtKind::Enum => enums::build_enum_type_di_node(cx, unique_type_id),
         },
         ty::Tuple(_) => build_tuple_type_di_node(cx, unique_type_id),
-        // Type parameters from polymorphized functions.
-        ty::Param(_) => build_param_type_di_node(cx, t),
         _ => bug!("debuginfo: unexpected type in type_di_node(): {:?}", t),
     };
 
@@ -518,7 +505,7 @@ fn recursion_marker_type_di_node<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>) -> &'ll D
                 name.as_c_char_ptr(),
                 name.len(),
                 cx.tcx.data_layout.pointer_size.bits(),
-                DW_ATE_unsigned,
+                dwarf_const::DW_ATE_unsigned,
             )
         }
     })
@@ -722,6 +709,14 @@ fn build_cpp_f16_di_node<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>) -> DINodeCreation
     // `f16`'s value to be displayed using a Natvis visualiser in `intrinsic.natvis`.
     let float_ty = cx.tcx.types.f16;
     let bits_ty = cx.tcx.types.u16;
+    let def_location = if cx.sess().opts.unstable_opts.debug_info_type_line_numbers {
+        match float_ty.kind() {
+            ty::Adt(def, _) => Some(file_metadata_from_def_id(cx, Some(def.did()))),
+            _ => None,
+        }
+    } else {
+        None
+    };
     type_map::build_type_with_children(
         cx,
         type_map::stub(
@@ -729,12 +724,21 @@ fn build_cpp_f16_di_node<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>) -> DINodeCreation
             Stub::Struct,
             UniqueTypeId::for_ty(cx.tcx, float_ty),
             "f16",
+            def_location,
             cx.size_and_align_of(float_ty),
             NO_SCOPE_METADATA,
             DIFlags::FlagZero,
         ),
         // Fields:
         |cx, float_di_node| {
+            let def_id = if cx.sess().opts.unstable_opts.debug_info_type_line_numbers {
+                match bits_ty.kind() {
+                    ty::Adt(def, _) => Some(def.did()),
+                    _ => None,
+                }
+            } else {
+                None
+            };
             smallvec![build_field_di_node(
                 cx,
                 float_di_node,
@@ -743,6 +747,7 @@ fn build_cpp_f16_di_node<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>) -> DINodeCreation
                 Size::ZERO,
                 DIFlags::FlagZero,
                 type_di_node(cx, bits_ty),
+                def_id,
             )]
         },
         NO_GENERICS,
@@ -759,6 +764,8 @@ fn build_basic_type_di_node<'ll, 'tcx>(
     // .natvis visualizers (and perhaps other existing native debuggers?)
     let cpp_like_debuginfo = cpp_like_debuginfo(cx.tcx);
 
+    use dwarf_const::{DW_ATE_UTF, DW_ATE_boolean, DW_ATE_float, DW_ATE_signed, DW_ATE_unsigned};
+
     let (name, encoding) = match t.kind() {
         ty::Never => ("!", DW_ATE_unsigned),
         ty::Tuple(elements) if elements.is_empty() => {
@@ -839,6 +846,7 @@ fn build_foreign_type_di_node<'ll, 'tcx>(
             Stub::Struct,
             unique_type_id,
             &compute_debuginfo_type_name(cx.tcx, t, false),
+            None,
             cx.size_and_align_of(t),
             Some(get_namespace_for_item(cx, def_id)),
             DIFlags::FlagZero,
@@ -848,26 +856,6 @@ fn build_foreign_type_di_node<'ll, 'tcx>(
     )
 }
 
-fn build_param_type_di_node<'ll, 'tcx>(
-    cx: &CodegenCx<'ll, 'tcx>,
-    t: Ty<'tcx>,
-) -> DINodeCreationResult<'ll> {
-    debug!("build_param_type_di_node: {:?}", t);
-    let name = format!("{t:?}");
-    DINodeCreationResult {
-        di_node: unsafe {
-            llvm::LLVMRustDIBuilderCreateBasicType(
-                DIB(cx),
-                name.as_c_char_ptr(),
-                name.len(),
-                Size::ZERO.bits(),
-                DW_ATE_unsigned,
-            )
-        },
-        already_stored_in_typemap: false,
-    }
-}
-
 pub(crate) fn build_compile_unit_di_node<'ll, 'tcx>(
     tcx: TyCtxt<'tcx>,
     codegen_unit_name: &str,
@@ -958,7 +946,7 @@ pub(crate) fn build_compile_unit_di_node<'ll, 'tcx>(
 
         let unit_metadata = llvm::LLVMRustDIBuilderCreateCompileUnit(
             debug_context.builder,
-            DW_LANG_RUST,
+            dwarf_const::DW_LANG_Rust,
             compile_unit_file,
             producer.as_c_char_ptr(),
             producer.len(),
@@ -989,15 +977,22 @@ fn build_field_di_node<'ll, 'tcx>(
     offset: Size,
     flags: DIFlags,
     type_di_node: &'ll DIType,
+    def_id: Option<DefId>,
 ) -> &'ll DIType {
+    let (file_metadata, line_number) = if cx.sess().opts.unstable_opts.debug_info_type_line_numbers
+    {
+        file_metadata_from_def_id(cx, def_id)
+    } else {
+        (unknown_file_metadata(cx), UNKNOWN_LINE_NUMBER)
+    };
     unsafe {
         llvm::LLVMRustDIBuilderCreateMemberType(
             DIB(cx),
             owner,
             name.as_c_char_ptr(),
             name.len(),
-            unknown_file_metadata(cx),
-            UNKNOWN_LINE_NUMBER,
+            file_metadata,
+            line_number,
             size_and_align.0.bits(),
             size_and_align.1.bits() as u32,
             offset.bits(),
@@ -1041,6 +1036,11 @@ fn build_struct_type_di_node<'ll, 'tcx>(
     let containing_scope = get_namespace_for_item(cx, adt_def.did());
     let struct_type_and_layout = cx.layout_of(struct_type);
     let variant_def = adt_def.non_enum_variant();
+    let def_location = if cx.sess().opts.unstable_opts.debug_info_type_line_numbers {
+        Some(file_metadata_from_def_id(cx, Some(adt_def.did())))
+    } else {
+        None
+    };
 
     type_map::build_type_with_children(
         cx,
@@ -1049,6 +1049,7 @@ fn build_struct_type_di_node<'ll, 'tcx>(
             Stub::Struct,
             unique_type_id,
             &compute_debuginfo_type_name(cx.tcx, struct_type, false),
+            def_location,
             size_and_align_of(struct_type_and_layout),
             Some(containing_scope),
             visibility_di_flags(cx, adt_def.did(), adt_def.did()),
@@ -1068,6 +1069,11 @@ fn build_struct_type_di_node<'ll, 'tcx>(
                         Cow::Borrowed(f.name.as_str())
                     };
                     let field_layout = struct_type_and_layout.field(cx, i);
+                    let def_id = if cx.sess().opts.unstable_opts.debug_info_type_line_numbers {
+                        Some(f.did)
+                    } else {
+                        None
+                    };
                     build_field_di_node(
                         cx,
                         owner,
@@ -1076,6 +1082,7 @@ fn build_struct_type_di_node<'ll, 'tcx>(
                         struct_type_and_layout.fields.offset(i),
                         visibility_di_flags(cx, f.did, adt_def.did()),
                         type_di_node(cx, field_layout.ty),
+                        def_id,
                     )
                 })
                 .collect()
@@ -1125,6 +1132,7 @@ fn build_upvar_field_di_nodes<'ll, 'tcx>(
                 layout.fields.offset(index),
                 DIFlags::FlagZero,
                 type_di_node(cx, up_var_ty),
+                None,
             )
         })
         .collect()
@@ -1150,6 +1158,7 @@ fn build_tuple_type_di_node<'ll, 'tcx>(
             Stub::Struct,
             unique_type_id,
             &type_name,
+            None,
             size_and_align_of(tuple_type_and_layout),
             NO_SCOPE_METADATA,
             DIFlags::FlagZero,
@@ -1168,6 +1177,7 @@ fn build_tuple_type_di_node<'ll, 'tcx>(
                         tuple_type_and_layout.fields.offset(index),
                         DIFlags::FlagZero,
                         type_di_node(cx, component_type),
+                        None,
                     )
                 })
                 .collect()
@@ -1189,6 +1199,12 @@ fn build_closure_env_di_node<'ll, 'tcx>(
     let containing_scope = get_namespace_for_item(cx, def_id);
     let type_name = compute_debuginfo_type_name(cx.tcx, closure_env_type, false);
 
+    let def_location = if cx.sess().opts.unstable_opts.debug_info_type_line_numbers {
+        Some(file_metadata_from_def_id(cx, Some(def_id)))
+    } else {
+        None
+    };
+
     type_map::build_type_with_children(
         cx,
         type_map::stub(
@@ -1196,6 +1212,7 @@ fn build_closure_env_di_node<'ll, 'tcx>(
             Stub::Struct,
             unique_type_id,
             &type_name,
+            def_location,
             cx.size_and_align_of(closure_env_type),
             Some(containing_scope),
             DIFlags::FlagZero,
@@ -1219,6 +1236,11 @@ fn build_union_type_di_node<'ll, 'tcx>(
     let containing_scope = get_namespace_for_item(cx, union_def_id);
     let union_ty_and_layout = cx.layout_of(union_type);
     let type_name = compute_debuginfo_type_name(cx.tcx, union_type, false);
+    let def_location = if cx.sess().opts.unstable_opts.debug_info_type_line_numbers {
+        Some(file_metadata_from_def_id(cx, Some(union_def_id)))
+    } else {
+        None
+    };
 
     type_map::build_type_with_children(
         cx,
@@ -1227,6 +1249,7 @@ fn build_union_type_di_node<'ll, 'tcx>(
             Stub::Union,
             unique_type_id,
             &type_name,
+            def_location,
             size_and_align_of(union_ty_and_layout),
             Some(containing_scope),
             DIFlags::FlagZero,
@@ -1239,6 +1262,11 @@ fn build_union_type_di_node<'ll, 'tcx>(
                 .enumerate()
                 .map(|(i, f)| {
                     let field_layout = union_ty_and_layout.field(cx, i);
+                    let def_id = if cx.sess().opts.unstable_opts.debug_info_type_line_numbers {
+                        Some(f.did)
+                    } else {
+                        None
+                    };
                     build_field_di_node(
                         cx,
                         owner,
@@ -1247,6 +1275,7 @@ fn build_union_type_di_node<'ll, 'tcx>(
                         Size::ZERO,
                         DIFlags::FlagZero,
                         type_di_node(cx, field_layout.ty),
+                        def_id,
                     )
                 })
                 .collect()
@@ -1321,14 +1350,7 @@ pub(crate) fn build_global_var_di_node<'ll>(
     // We may want to remove the namespace scope if we're in an extern block (see
     // https://github.com/rust-lang/rust/pull/46457#issuecomment-351750952).
     let var_scope = get_namespace_for_item(cx, def_id);
-    let span = hygiene::walk_chain_collapsed(tcx.def_span(def_id), DUMMY_SP);
-
-    let (file_metadata, line_number) = if !span.is_dummy() {
-        let loc = cx.lookup_debug_loc(span.lo());
-        (file_metadata(cx, &loc.file), loc.line)
-    } else {
-        (unknown_file_metadata(cx), UNKNOWN_LINE_NUMBER)
-    };
+    let (file_metadata, line_number) = file_metadata_from_def_id(cx, Some(def_id));
 
     let is_local_to_unit = is_node_local_to_unit(cx, def_id);
 
@@ -1418,6 +1440,7 @@ fn build_vtable_type_di_node<'ll, 'tcx>(
             Stub::VTableTy { vtable_holder },
             unique_type_id,
             &vtable_type_name,
+            None,
             (size, pointer_align),
             NO_SCOPE_METADATA,
             DIFlags::FlagArtificial,
@@ -1455,6 +1478,7 @@ fn build_vtable_type_di_node<'ll, 'tcx>(
                         field_offset,
                         DIFlags::FlagZero,
                         field_type_di_node,
+                        None,
                     ))
                 })
                 .collect()
@@ -1606,3 +1630,20 @@ fn tuple_field_name(field_index: usize) -> Cow<'static, str> {
         .map(|s| Cow::from(*s))
         .unwrap_or_else(|| Cow::from(format!("__{field_index}")))
 }
+
+pub(crate) type DefinitionLocation<'ll> = (&'ll DIFile, c_uint);
+
+pub(crate) fn file_metadata_from_def_id<'ll>(
+    cx: &CodegenCx<'ll, '_>,
+    def_id: Option<DefId>,
+) -> DefinitionLocation<'ll> {
+    if let Some(def_id) = def_id
+        && let span = hygiene::walk_chain_collapsed(cx.tcx.def_span(def_id), DUMMY_SP)
+        && !span.is_dummy()
+    {
+        let loc = cx.lookup_debug_loc(span.lo());
+        (file_metadata(cx, &loc.file), loc.line)
+    } else {
+        (unknown_file_metadata(cx), UNKNOWN_LINE_NUMBER)
+    }
+}
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs
index 100b046cee2..a72e205c9b2 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs
@@ -4,7 +4,7 @@ use libc::c_uint;
 use rustc_abi::{Align, Endian, Size, TagEncoding, VariantIdx, Variants};
 use rustc_codegen_ssa::debuginfo::type_names::compute_debuginfo_type_name;
 use rustc_codegen_ssa::debuginfo::{tag_base_type, wants_c_like_enum_debuginfo};
-use rustc_codegen_ssa::traits::ConstCodegenMethods;
+use rustc_codegen_ssa::traits::{ConstCodegenMethods, MiscCodegenMethods};
 use rustc_index::IndexVec;
 use rustc_middle::bug;
 use rustc_middle::ty::layout::{LayoutOf, TyAndLayout};
@@ -12,12 +12,13 @@ use rustc_middle::ty::{self, AdtDef, CoroutineArgs, CoroutineArgsExt, Ty};
 use smallvec::smallvec;
 
 use crate::common::{AsCCharPtr, CodegenCx};
+use crate::debuginfo::dwarf_const::DW_TAG_const_type;
 use crate::debuginfo::metadata::enums::DiscrResult;
 use crate::debuginfo::metadata::type_map::{self, Stub, UniqueTypeId};
 use crate::debuginfo::metadata::{
     DINodeCreationResult, NO_GENERICS, NO_SCOPE_METADATA, SmallVec, UNKNOWN_LINE_NUMBER,
-    build_field_di_node, file_metadata, size_and_align_of, type_di_node, unknown_file_metadata,
-    visibility_di_flags,
+    build_field_di_node, file_metadata, file_metadata_from_def_id, size_and_align_of, type_di_node,
+    unknown_file_metadata, visibility_di_flags,
 };
 use crate::debuginfo::utils::DIB;
 use crate::llvm::debuginfo::{DIFile, DIFlags, DIType};
@@ -192,6 +193,12 @@ pub(super) fn build_enum_type_di_node<'ll, 'tcx>(
 
     assert!(!wants_c_like_enum_debuginfo(cx.tcx, enum_type_and_layout));
 
+    let def_location = if cx.sess().opts.unstable_opts.debug_info_type_line_numbers {
+        Some(file_metadata_from_def_id(cx, Some(enum_adt_def.did())))
+    } else {
+        None
+    };
+
     type_map::build_type_with_children(
         cx,
         type_map::stub(
@@ -199,27 +206,24 @@ pub(super) fn build_enum_type_di_node<'ll, 'tcx>(
             type_map::Stub::Union,
             unique_type_id,
             &enum_type_name,
+            def_location,
             cx.size_and_align_of(enum_type),
             NO_SCOPE_METADATA,
             visibility_di_flags(cx, enum_adt_def.did(), enum_adt_def.did()),
         ),
         |cx, enum_type_di_node| {
             match enum_type_and_layout.variants {
-                Variants::Single { index: variant_index } => {
-                    if enum_adt_def.variants().is_empty() {
-                        // Uninhabited enums have Variants::Single. We don't generate
-                        // any members for them.
-                        return smallvec![];
-                    }
-
-                    build_single_variant_union_fields(
-                        cx,
-                        enum_adt_def,
-                        enum_type_and_layout,
-                        enum_type_di_node,
-                        variant_index,
-                    )
+                Variants::Empty => {
+                    // We don't generate any members for uninhabited types.
+                    return smallvec![];
                 }
+                Variants::Single { index: variant_index } => build_single_variant_union_fields(
+                    cx,
+                    enum_adt_def,
+                    enum_type_and_layout,
+                    enum_type_di_node,
+                    variant_index,
+                ),
                 Variants::Multiple {
                     tag_encoding: TagEncoding::Direct,
                     ref variants,
@@ -262,6 +266,14 @@ pub(super) fn build_coroutine_di_node<'ll, 'tcx>(
     unique_type_id: UniqueTypeId<'tcx>,
 ) -> DINodeCreationResult<'ll> {
     let coroutine_type = unique_type_id.expect_ty();
+    let def_location = if cx.sess().opts.unstable_opts.debug_info_type_line_numbers {
+        let &ty::Coroutine(coroutine_def_id, _) = coroutine_type.kind() else {
+            bug!("build_coroutine_di_node() called with non-coroutine type: `{:?}`", coroutine_type)
+        };
+        Some(file_metadata_from_def_id(cx, Some(coroutine_def_id)))
+    } else {
+        None
+    };
     let coroutine_type_and_layout = cx.layout_of(coroutine_type);
     let coroutine_type_name = compute_debuginfo_type_name(cx.tcx, coroutine_type, false);
 
@@ -274,6 +286,7 @@ pub(super) fn build_coroutine_di_node<'ll, 'tcx>(
             type_map::Stub::Union,
             unique_type_id,
             &coroutine_type_name,
+            def_location,
             size_and_align_of(coroutine_type_and_layout),
             NO_SCOPE_METADATA,
             DIFlags::FlagZero,
@@ -287,6 +300,7 @@ pub(super) fn build_coroutine_di_node<'ll, 'tcx>(
                 )
             }
             Variants::Single { .. }
+            | Variants::Empty
             | Variants::Multiple { tag_encoding: TagEncoding::Niche { .. }, .. } => {
                 bug!(
                     "Encountered coroutine with non-direct-tag layout: {:?}",
@@ -321,6 +335,12 @@ fn build_single_variant_union_fields<'ll, 'tcx>(
     let tag_base_type_di_node = type_di_node(cx, tag_base_type);
     let tag_base_type_align = cx.align_of(tag_base_type);
 
+    let enum_adt_def_id = if cx.sess().opts.unstable_opts.debug_info_type_line_numbers {
+        Some(enum_adt_def.did())
+    } else {
+        None
+    };
+
     let variant_names_type_di_node = build_variant_names_type_di_node(
         cx,
         enum_type_di_node,
@@ -328,6 +348,7 @@ fn build_single_variant_union_fields<'ll, 'tcx>(
             variant_index,
             Cow::from(enum_adt_def.variant(variant_index).name.as_str()),
         )),
+        enum_adt_def_id,
     );
 
     let variant_struct_type_wrapper_di_node = build_variant_struct_wrapper_type_di_node(
@@ -341,6 +362,7 @@ fn build_single_variant_union_fields<'ll, 'tcx>(
         tag_base_type_di_node,
         tag_base_type,
         DiscrResult::NoDiscriminant,
+        None,
     );
 
     smallvec![
@@ -354,6 +376,7 @@ fn build_single_variant_union_fields<'ll, 'tcx>(
             Size::ZERO,
             visibility_flags,
             variant_struct_type_wrapper_di_node,
+            None,
         ),
         unsafe {
             llvm::LLVMRustDIBuilderCreateStaticMemberType(
@@ -383,6 +406,12 @@ fn build_union_fields_for_enum<'ll, 'tcx>(
 ) -> SmallVec<&'ll DIType> {
     let tag_base_type = tag_base_type(cx.tcx, enum_type_and_layout);
 
+    let enum_adt_def_id = if cx.sess().opts.unstable_opts.debug_info_type_line_numbers {
+        Some(enum_adt_def.did())
+    } else {
+        None
+    };
+
     let variant_names_type_di_node = build_variant_names_type_di_node(
         cx,
         enum_type_di_node,
@@ -390,6 +419,7 @@ fn build_union_fields_for_enum<'ll, 'tcx>(
             let variant_name = Cow::from(enum_adt_def.variant(variant_index).name.as_str());
             (variant_index, variant_name)
         }),
+        enum_adt_def_id,
     );
     let visibility_flags = visibility_di_flags(cx, enum_adt_def.did(), enum_adt_def.did());
 
@@ -447,6 +477,7 @@ fn build_variant_names_type_di_node<'ll, 'tcx>(
     cx: &CodegenCx<'ll, 'tcx>,
     containing_scope: &'ll DIType,
     variants: impl Iterator<Item = (VariantIdx, Cow<'tcx, str>)>,
+    enum_def_id: Option<rustc_span::def_id::DefId>,
 ) -> &'ll DIType {
     // Create an enumerator for each variant.
     super::build_enumeration_type_di_node(
@@ -454,6 +485,7 @@ fn build_variant_names_type_di_node<'ll, 'tcx>(
         "VariantNames",
         variant_names_enum_base_type(cx),
         variants.map(|(variant_index, variant_name)| (variant_name, variant_index.as_u32().into())),
+        enum_def_id,
         containing_scope,
     )
 }
@@ -469,6 +501,7 @@ fn build_variant_struct_wrapper_type_di_node<'ll, 'tcx>(
     tag_base_type_di_node: &'ll DIType,
     tag_base_type: Ty<'tcx>,
     discr: DiscrResult,
+    source_info: Option<(&'ll DIFile, c_uint)>,
 ) -> &'ll DIType {
     type_map::build_type_with_children(
         cx,
@@ -481,6 +514,7 @@ fn build_variant_struct_wrapper_type_di_node<'ll, 'tcx>(
                 variant_index,
             ),
             &variant_struct_wrapper_type_name(variant_index),
+            source_info,
             // NOTE: We use size and align of enum_type, not from variant_layout:
             size_and_align_of(enum_or_coroutine_type_and_layout),
             Some(enum_or_coroutine_type_di_node),
@@ -530,24 +564,42 @@ fn build_variant_struct_wrapper_type_di_node<'ll, 'tcx>(
                 Size::ZERO,
                 DIFlags::FlagZero,
                 variant_struct_type_di_node,
+                None,
             ));
 
-            let build_assoc_const =
-                |name: &str, type_di_node: &'ll DIType, value: u64, align: Align| unsafe {
-                    llvm::LLVMRustDIBuilderCreateStaticMemberType(
-                        DIB(cx),
-                        wrapper_struct_type_di_node,
-                        name.as_c_char_ptr(),
-                        name.len(),
-                        unknown_file_metadata(cx),
-                        UNKNOWN_LINE_NUMBER,
-                        type_di_node,
-                        DIFlags::FlagZero,
-                        Some(cx.const_u64(value)),
-                        align.bits() as u32,
-                    )
+            let build_assoc_const = |name: &str,
+                                     type_di_node_: &'ll DIType,
+                                     value: u64,
+                                     align: Align| unsafe {
+                // FIXME: Currently we force all DISCR_* values to be u64's as LLDB seems to have
+                // problems inspecting other value types. Since DISCR_* is typically only going to be
+                // directly inspected via the debugger visualizer - which compares it to the `tag` value
+                // (whose type is not modified at all) it shouldn't cause any real problems.
+                let (t_di, align) = if name == ASSOC_CONST_DISCR_NAME {
+                    (type_di_node_, align.bits() as u32)
+                } else {
+                    let ty_u64 = Ty::new_uint(cx.tcx, ty::UintTy::U64);
+                    (type_di_node(cx, ty_u64), Align::EIGHT.bits() as u32)
                 };
 
+                // must wrap type in a `const` modifier for LLDB to be able to inspect the value of the member
+                let field_type =
+                    llvm::LLVMRustDIBuilderCreateQualifiedType(DIB(cx), DW_TAG_const_type, t_di);
+
+                llvm::LLVMRustDIBuilderCreateStaticMemberType(
+                    DIB(cx),
+                    wrapper_struct_type_di_node,
+                    name.as_c_char_ptr(),
+                    name.len(),
+                    unknown_file_metadata(cx),
+                    UNKNOWN_LINE_NUMBER,
+                    field_type,
+                    DIFlags::FlagZero,
+                    Some(cx.const_u64(value)),
+                    align,
+                )
+            };
+
             // We also always have an associated constant for the discriminant value
             // of the variant.
             fields.push(build_assoc_const(
@@ -684,6 +736,11 @@ fn build_union_fields_for_direct_tag_coroutine<'ll, 'tcx>(
         variant_range
             .clone()
             .map(|variant_index| (variant_index, CoroutineArgs::variant_name(variant_index))),
+        if cx.sess().opts.unstable_opts.debug_info_type_line_numbers {
+            Some(coroutine_def_id)
+        } else {
+            None
+        },
     );
 
     let discriminants: IndexVec<VariantIdx, DiscrResult> = {
@@ -776,6 +833,11 @@ fn build_union_fields_for_direct_tag_enum_or_coroutine<'ll, 'tcx>(
             tag_base_type_di_node,
             tag_base_type,
             variant_member_info.discr,
+            if cx.sess().opts.unstable_opts.debug_info_type_line_numbers {
+                variant_member_info.source_info
+            } else {
+                None
+            },
         );
 
         // We use LLVMRustDIBuilderCreateMemberType() member type directly because
@@ -831,6 +893,7 @@ fn build_union_fields_for_direct_tag_enum_or_coroutine<'ll, 'tcx>(
             lo_offset,
             di_flags,
             type_di_node,
+            None,
         ));
 
         unions_fields.push(build_field_di_node(
@@ -841,6 +904,7 @@ fn build_union_fields_for_direct_tag_enum_or_coroutine<'ll, 'tcx>(
             hi_offset,
             DIFlags::FlagZero,
             type_di_node,
+            None,
         ));
     } else {
         unions_fields.push(build_field_di_node(
@@ -851,6 +915,7 @@ fn build_union_fields_for_direct_tag_enum_or_coroutine<'ll, 'tcx>(
             enum_type_and_layout.fields.offset(tag_field),
             di_flags,
             tag_base_type_di_node,
+            None,
         ));
     }
 
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs
index b3d4a6642a1..9f6a5cc89e0 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs
@@ -3,6 +3,7 @@ use std::borrow::Cow;
 use rustc_abi::{FieldIdx, TagEncoding, VariantIdx, Variants};
 use rustc_codegen_ssa::debuginfo::type_names::{compute_debuginfo_type_name, cpp_like_debuginfo};
 use rustc_codegen_ssa::debuginfo::{tag_base_type, wants_c_like_enum_debuginfo};
+use rustc_codegen_ssa::traits::MiscCodegenMethods;
 use rustc_hir::def::CtorKind;
 use rustc_index::IndexSlice;
 use rustc_middle::bug;
@@ -16,8 +17,8 @@ use super::{SmallVec, size_and_align_of};
 use crate::common::{AsCCharPtr, CodegenCx};
 use crate::debuginfo::metadata::type_map::{self, Stub};
 use crate::debuginfo::metadata::{
-    UNKNOWN_LINE_NUMBER, build_field_di_node, build_generic_type_param_di_nodes, type_di_node,
-    unknown_file_metadata,
+    UNKNOWN_LINE_NUMBER, build_field_di_node, build_generic_type_param_di_nodes,
+    file_metadata_from_def_id, type_di_node, unknown_file_metadata,
 };
 use crate::debuginfo::utils::{DIB, create_DIArray, get_namespace_for_item};
 use crate::llvm::debuginfo::{DIFlags, DIType};
@@ -68,6 +69,11 @@ fn build_c_style_enum_di_node<'ll, 'tcx>(
     enum_type_and_layout: TyAndLayout<'tcx>,
 ) -> DINodeCreationResult<'ll> {
     let containing_scope = get_namespace_for_item(cx, enum_adt_def.did());
+    let enum_adt_def_id = if cx.sess().opts.unstable_opts.debug_info_type_line_numbers {
+        Some(enum_adt_def.did())
+    } else {
+        None
+    };
     DINodeCreationResult {
         di_node: build_enumeration_type_di_node(
             cx,
@@ -77,6 +83,7 @@ fn build_c_style_enum_di_node<'ll, 'tcx>(
                 let name = Cow::from(enum_adt_def.variant(variant_index).name.as_str());
                 (name, discr.val)
             }),
+            enum_adt_def_id,
             containing_scope,
         ),
         already_stored_in_typemap: false,
@@ -92,6 +99,7 @@ fn build_enumeration_type_di_node<'ll, 'tcx>(
     type_name: &str,
     base_type: Ty<'tcx>,
     enumerators: impl Iterator<Item = (Cow<'tcx, str>, u128)>,
+    def_id: Option<rustc_span::def_id::DefId>,
     containing_scope: &'ll DIType,
 ) -> &'ll DIType {
     let is_unsigned = match base_type.kind() {
@@ -115,14 +123,21 @@ fn build_enumeration_type_di_node<'ll, 'tcx>(
         })
         .collect();
 
+    let (file_metadata, line_number) = if cx.sess().opts.unstable_opts.debug_info_type_line_numbers
+    {
+        file_metadata_from_def_id(cx, def_id)
+    } else {
+        (unknown_file_metadata(cx), UNKNOWN_LINE_NUMBER)
+    };
+
     unsafe {
         llvm::LLVMRustDIBuilderCreateEnumerationType(
             DIB(cx),
             containing_scope,
             type_name.as_c_char_ptr(),
             type_name.len(),
-            unknown_file_metadata(cx),
-            UNKNOWN_LINE_NUMBER,
+            file_metadata,
+            line_number,
             size.bits(),
             align.bits() as u32,
             create_DIArray(DIB(cx), &enumerator_di_nodes[..]),
@@ -193,6 +208,12 @@ fn build_enum_variant_struct_type_di_node<'ll, 'tcx>(
 ) -> &'ll DIType {
     assert_eq!(variant_layout.ty, enum_type_and_layout.ty);
 
+    let def_location = if cx.sess().opts.unstable_opts.debug_info_type_line_numbers {
+        Some(file_metadata_from_def_id(cx, Some(variant_def.def_id)))
+    } else {
+        None
+    };
+
     type_map::build_type_with_children(
         cx,
         type_map::stub(
@@ -204,6 +225,7 @@ fn build_enum_variant_struct_type_di_node<'ll, 'tcx>(
                 variant_index,
             ),
             variant_def.name.as_str(),
+            def_location,
             // NOTE: We use size and align of enum_type, not from variant_layout:
             size_and_align_of(enum_type_and_layout),
             Some(enum_type_di_node),
@@ -231,6 +253,7 @@ fn build_enum_variant_struct_type_di_node<'ll, 'tcx>(
                         variant_layout.fields.offset(field_index),
                         di_flags,
                         type_di_node(cx, field_layout.ty),
+                        None,
                     )
                 })
                 .collect::<SmallVec<_>>()
@@ -286,6 +309,7 @@ fn build_coroutine_variant_struct_type_di_node<'ll, 'tcx>(
             Stub::Struct,
             unique_type_id,
             &variant_name,
+            None,
             size_and_align_of(coroutine_type_and_layout),
             Some(coroutine_type_di_node),
             DIFlags::FlagZero,
@@ -312,6 +336,7 @@ fn build_coroutine_variant_struct_type_di_node<'ll, 'tcx>(
                         variant_layout.fields.offset(field_index),
                         DIFlags::FlagZero,
                         type_di_node(cx, field_type),
+                        None,
                     )
                 })
                 .collect();
@@ -331,6 +356,7 @@ fn build_coroutine_variant_struct_type_di_node<'ll, 'tcx>(
                         coroutine_type_and_layout.fields.offset(index),
                         DIFlags::FlagZero,
                         type_di_node(cx, upvar_ty),
+                        None,
                     )
                 })
                 .collect();
@@ -366,7 +392,7 @@ fn compute_discriminant_value<'ll, 'tcx>(
     variant_index: VariantIdx,
 ) -> DiscrResult {
     match enum_type_and_layout.layout.variants() {
-        &Variants::Single { .. } => DiscrResult::NoDiscriminant,
+        &Variants::Single { .. } | &Variants::Empty => DiscrResult::NoDiscriminant,
         &Variants::Multiple { tag_encoding: TagEncoding::Direct, .. } => DiscrResult::Value(
             enum_type_and_layout.ty.discriminant_for_variant(cx.tcx, variant_index).unwrap().val,
         ),
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs
index d4006691d37..11824398f24 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs
@@ -4,7 +4,7 @@ use libc::c_uint;
 use rustc_abi::{Size, TagEncoding, VariantIdx, Variants};
 use rustc_codegen_ssa::debuginfo::type_names::compute_debuginfo_type_name;
 use rustc_codegen_ssa::debuginfo::{tag_base_type, wants_c_like_enum_debuginfo};
-use rustc_codegen_ssa::traits::ConstCodegenMethods;
+use rustc_codegen_ssa::traits::{ConstCodegenMethods, MiscCodegenMethods};
 use rustc_middle::bug;
 use rustc_middle::ty::layout::{LayoutOf, TyAndLayout};
 use rustc_middle::ty::{self};
@@ -14,7 +14,8 @@ use crate::common::{AsCCharPtr, CodegenCx};
 use crate::debuginfo::metadata::type_map::{self, Stub, StubInfo, UniqueTypeId};
 use crate::debuginfo::metadata::{
     DINodeCreationResult, NO_GENERICS, SmallVec, UNKNOWN_LINE_NUMBER, file_metadata,
-    size_and_align_of, type_di_node, unknown_file_metadata, visibility_di_flags,
+    file_metadata_from_def_id, size_and_align_of, type_di_node, unknown_file_metadata,
+    visibility_di_flags,
 };
 use crate::debuginfo::utils::{DIB, create_DIArray, get_namespace_for_item};
 use crate::llvm::debuginfo::{DIFile, DIFlags, DIType};
@@ -55,6 +56,12 @@ pub(super) fn build_enum_type_di_node<'ll, 'tcx>(
 
     assert!(!wants_c_like_enum_debuginfo(cx.tcx, enum_type_and_layout));
 
+    let def_location = if cx.sess().opts.unstable_opts.debug_info_type_line_numbers {
+        Some(file_metadata_from_def_id(cx, Some(enum_adt_def.did())))
+    } else {
+        None
+    };
+
     type_map::build_type_with_children(
         cx,
         type_map::stub(
@@ -62,6 +69,7 @@ pub(super) fn build_enum_type_di_node<'ll, 'tcx>(
             Stub::Struct,
             unique_type_id,
             &enum_type_name,
+            def_location,
             size_and_align_of(enum_type_and_layout),
             Some(containing_scope),
             visibility_flags,
@@ -84,14 +92,27 @@ pub(super) fn build_enum_type_di_node<'ll, 'tcx>(
                         enum_type_and_layout.for_variant(cx, variant_index),
                         visibility_flags,
                     ),
-                    source_info: None,
+                    source_info: if cx.sess().opts.unstable_opts.debug_info_type_line_numbers {
+                        Some(file_metadata_from_def_id(
+                            cx,
+                            Some(enum_adt_def.variant(variant_index).def_id),
+                        ))
+                    } else {
+                        None
+                    },
                 })
                 .collect();
 
+            let enum_adt_def_id = if cx.sess().opts.unstable_opts.debug_info_type_line_numbers {
+                Some(enum_adt_def.did())
+            } else {
+                None
+            };
             smallvec![build_enum_variant_part_di_node(
                 cx,
                 enum_type_and_layout,
                 enum_type_di_node,
+                enum_adt_def_id,
                 &variant_member_infos[..],
             )]
         },
@@ -134,6 +155,12 @@ pub(super) fn build_coroutine_di_node<'ll, 'tcx>(
 
     let coroutine_type_name = compute_debuginfo_type_name(cx.tcx, coroutine_type, false);
 
+    let def_location = if cx.sess().opts.unstable_opts.debug_info_type_line_numbers {
+        Some(file_metadata_from_def_id(cx, Some(coroutine_def_id)))
+    } else {
+        None
+    };
+
     type_map::build_type_with_children(
         cx,
         type_map::stub(
@@ -141,6 +168,7 @@ pub(super) fn build_coroutine_di_node<'ll, 'tcx>(
             Stub::Struct,
             unique_type_id,
             &coroutine_type_name,
+            def_location,
             size_and_align_of(coroutine_type_and_layout),
             Some(containing_scope),
             DIFlags::FlagZero,
@@ -197,10 +225,16 @@ pub(super) fn build_coroutine_di_node<'ll, 'tcx>(
                 })
                 .collect();
 
+            let coroutine_def_id = if cx.sess().opts.unstable_opts.debug_info_type_line_numbers {
+                Some(coroutine_def_id)
+            } else {
+                None
+            };
             smallvec![build_enum_variant_part_di_node(
                 cx,
                 coroutine_type_and_layout,
                 coroutine_type_di_node,
+                coroutine_def_id,
                 &variant_struct_type_di_nodes[..],
             )]
         },
@@ -228,6 +262,7 @@ fn build_enum_variant_part_di_node<'ll, 'tcx>(
     cx: &CodegenCx<'ll, 'tcx>,
     enum_type_and_layout: TyAndLayout<'tcx>,
     enum_type_di_node: &'ll DIType,
+    enum_type_def_id: Option<rustc_span::def_id::DefId>,
     variant_member_infos: &[VariantMemberInfo<'_, 'll>],
 ) -> &'ll DIType {
     let tag_member_di_node =
@@ -236,6 +271,13 @@ fn build_enum_variant_part_di_node<'ll, 'tcx>(
     let variant_part_unique_type_id =
         UniqueTypeId::for_enum_variant_part(cx.tcx, enum_type_and_layout.ty);
 
+    let (file_metadata, line_number) = if cx.sess().opts.unstable_opts.debug_info_type_line_numbers
+    {
+        file_metadata_from_def_id(cx, enum_type_def_id)
+    } else {
+        (unknown_file_metadata(cx), UNKNOWN_LINE_NUMBER)
+    };
+
     let stub = StubInfo::new(
         cx,
         variant_part_unique_type_id,
@@ -246,8 +288,8 @@ fn build_enum_variant_part_di_node<'ll, 'tcx>(
                 enum_type_di_node,
                 variant_part_name.as_c_char_ptr(),
                 variant_part_name.len(),
-                unknown_file_metadata(cx),
-                UNKNOWN_LINE_NUMBER,
+                file_metadata,
+                line_number,
                 enum_type_and_layout.size.bits(),
                 enum_type_and_layout.align.abi.bits() as u32,
                 DIFlags::FlagZero,
@@ -316,8 +358,8 @@ fn build_discr_member_di_node<'ll, 'tcx>(
     let containing_scope = enum_or_coroutine_type_di_node;
 
     match enum_or_coroutine_type_and_layout.layout.variants() {
-        // A single-variant enum has no discriminant.
-        &Variants::Single { .. } => None,
+        // A single-variant or no-variant enum has no discriminant.
+        &Variants::Single { .. } | &Variants::Empty => None,
 
         &Variants::Multiple { tag_field, .. } => {
             let tag_base_type = tag_base_type(cx.tcx, enum_or_coroutine_type_and_layout);
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs
index 4e461476040..a37e719d43f 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs
@@ -8,7 +8,7 @@ use rustc_macros::HashStable;
 use rustc_middle::bug;
 use rustc_middle::ty::{self, PolyExistentialTraitRef, Ty, TyCtxt};
 
-use super::{SmallVec, UNKNOWN_LINE_NUMBER, unknown_file_metadata};
+use super::{DefinitionLocation, SmallVec, UNKNOWN_LINE_NUMBER, unknown_file_metadata};
 use crate::common::{AsCCharPtr, CodegenCx};
 use crate::debuginfo::utils::{DIB, create_DIArray, debug_context};
 use crate::llvm::debuginfo::{DIFlags, DIScope, DIType};
@@ -186,6 +186,7 @@ pub(super) fn stub<'ll, 'tcx>(
     kind: Stub<'ll>,
     unique_type_id: UniqueTypeId<'tcx>,
     name: &str,
+    def_location: Option<DefinitionLocation<'ll>>,
     (size, align): (Size, Align),
     containing_scope: Option<&'ll DIScope>,
     flags: DIFlags,
@@ -193,6 +194,12 @@ pub(super) fn stub<'ll, 'tcx>(
     let empty_array = create_DIArray(DIB(cx), &[]);
     let unique_type_id_str = unique_type_id.generate_unique_id_string(cx.tcx);
 
+    let (file_metadata, line_number) = if let Some(def_location) = def_location {
+        (def_location.0, def_location.1)
+    } else {
+        (unknown_file_metadata(cx), UNKNOWN_LINE_NUMBER)
+    };
+
     let metadata = match kind {
         Stub::Struct | Stub::VTableTy { .. } => {
             let vtable_holder = match kind {
@@ -205,8 +212,8 @@ pub(super) fn stub<'ll, 'tcx>(
                     containing_scope,
                     name.as_c_char_ptr(),
                     name.len(),
-                    unknown_file_metadata(cx),
-                    UNKNOWN_LINE_NUMBER,
+                    file_metadata,
+                    line_number,
                     size.bits(),
                     align.bits() as u32,
                     flags,
@@ -225,8 +232,8 @@ pub(super) fn stub<'ll, 'tcx>(
                 containing_scope,
                 name.as_c_char_ptr(),
                 name.len(),
-                unknown_file_metadata(cx),
-                UNKNOWN_LINE_NUMBER,
+                file_metadata,
+                line_number,
                 size.bits(),
                 align.bits() as u32,
                 flags,
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
index a8fdfbed592..755f4816acf 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
@@ -19,9 +19,8 @@ use rustc_middle::ty::layout::{HasTypingEnv, LayoutOf};
 use rustc_middle::ty::{self, GenericArgsRef, Instance, Ty, TypeVisitableExt};
 use rustc_session::Session;
 use rustc_session::config::{self, DebugInfo};
-use rustc_span::symbol::Symbol;
 use rustc_span::{
-    BytePos, Pos, SourceFile, SourceFileAndLine, SourceFileHash, Span, StableSourceFileId,
+    BytePos, Pos, SourceFile, SourceFileAndLine, SourceFileHash, Span, StableSourceFileId, Symbol,
 };
 use smallvec::SmallVec;
 use tracing::debug;
@@ -40,6 +39,7 @@ use crate::llvm::debuginfo::{
 use crate::value::Value;
 
 mod create_scope_map;
+mod dwarf_const;
 mod gdb;
 pub(crate) mod metadata;
 mod namespace;
@@ -48,6 +48,10 @@ mod utils;
 use self::create_scope_map::compute_mir_scopes;
 pub(crate) use self::metadata::build_global_var_di_node;
 
+// FIXME(Zalathar): These `DW_TAG_*` constants are fake values that were
+// removed from LLVM in 2015, and are only used by our own `RustWrapper.cpp`
+// to decide which C++ API to call. Instead, we should just have two separate
+// FFI functions and choose the correct one on the Rust side.
 #[allow(non_upper_case_globals)]
 const DW_TAG_auto_variable: c_uint = 0x100;
 #[allow(non_upper_case_globals)]
@@ -153,29 +157,26 @@ impl<'ll> DebugInfoBuilderMethods for Builder<'_, 'll, '_> {
         indirect_offsets: &[Size],
         fragment: Option<Range<Size>>,
     ) {
+        use dwarf_const::{DW_OP_LLVM_fragment, DW_OP_deref, DW_OP_plus_uconst};
+
         // Convert the direct and indirect offsets and fragment byte range to address ops.
-        // FIXME(eddyb) use `const`s instead of getting the values via FFI,
-        // the values should match the ones in the DWARF standard anyway.
-        let op_deref = || unsafe { llvm::LLVMRustDIBuilderCreateOpDeref() };
-        let op_plus_uconst = || unsafe { llvm::LLVMRustDIBuilderCreateOpPlusUconst() };
-        let op_llvm_fragment = || unsafe { llvm::LLVMRustDIBuilderCreateOpLLVMFragment() };
         let mut addr_ops = SmallVec::<[u64; 8]>::new();
 
         if direct_offset.bytes() > 0 {
-            addr_ops.push(op_plus_uconst());
+            addr_ops.push(DW_OP_plus_uconst);
             addr_ops.push(direct_offset.bytes() as u64);
         }
         for &offset in indirect_offsets {
-            addr_ops.push(op_deref());
+            addr_ops.push(DW_OP_deref);
             if offset.bytes() > 0 {
-                addr_ops.push(op_plus_uconst());
+                addr_ops.push(DW_OP_plus_uconst);
                 addr_ops.push(offset.bytes() as u64);
             }
         }
         if let Some(fragment) = fragment {
             // `DW_OP_LLVM_fragment` takes as arguments the fragment's
             // offset and size, both of them in bits.
-            addr_ops.push(op_llvm_fragment());
+            addr_ops.push(DW_OP_LLVM_fragment);
             addr_ops.push(fragment.start.bits() as u64);
             addr_ops.push((fragment.end - fragment.start).bits() as u64);
         }
diff --git a/compiler/rustc_codegen_llvm/src/declare.rs b/compiler/rustc_codegen_llvm/src/declare.rs
index d338c848754..3ec386f6b07 100644
--- a/compiler/rustc_codegen_llvm/src/declare.rs
+++ b/compiler/rustc_codegen_llvm/src/declare.rs
@@ -32,7 +32,7 @@ use crate::{attributes, llvm};
 ///
 /// If there’s a value with the same name already declared, the function will
 /// update the declaration and return existing Value instead.
-fn declare_raw_fn<'ll>(
+pub(crate) fn declare_raw_fn<'ll>(
     cx: &CodegenCx<'ll, '_>,
     name: &str,
     callconv: llvm::CallConv,
diff --git a/compiler/rustc_codegen_llvm/src/errors.rs b/compiler/rustc_codegen_llvm/src/errors.rs
index 3cdb5b971d9..f4c9491f758 100644
--- a/compiler/rustc_codegen_llvm/src/errors.rs
+++ b/compiler/rustc_codegen_llvm/src/errors.rs
@@ -37,6 +37,7 @@ pub(crate) struct UnstableCTargetFeature<'a> {
 #[note(codegen_llvm_forbidden_ctarget_feature_issue)]
 pub(crate) struct ForbiddenCTargetFeature<'a> {
     pub feature: &'a str,
+    pub enabled: &'a str,
     pub reason: &'a str,
 }
 
@@ -90,6 +91,11 @@ impl<G: EmissionGuarantee> Diagnostic<'_, G> for ParseTargetMachineConfig<'_> {
 }
 
 #[derive(Diagnostic)]
+#[diag(codegen_llvm_autodiff_without_lto)]
+#[note]
+pub(crate) struct AutoDiffWithoutLTO;
+
+#[derive(Diagnostic)]
 #[diag(codegen_llvm_lto_disallowed)]
 pub(crate) struct LtoDisallowed;
 
@@ -131,6 +137,8 @@ pub enum LlvmError<'a> {
     PrepareThinLtoModule,
     #[diag(codegen_llvm_parse_bitcode)]
     ParseBitcode,
+    #[diag(codegen_llvm_prepare_autodiff)]
+    PrepareAutoDiff { src: String, target: String, error: String },
 }
 
 pub(crate) struct WithLlvmError<'a>(pub LlvmError<'a>, pub String);
@@ -152,6 +160,7 @@ impl<G: EmissionGuarantee> Diagnostic<'_, G> for WithLlvmError<'_> {
             }
             PrepareThinLtoModule => fluent::codegen_llvm_prepare_thin_lto_module_with_llvm_err,
             ParseBitcode => fluent::codegen_llvm_parse_bitcode_with_llvm_err,
+            PrepareAutoDiff { .. } => fluent::codegen_llvm_prepare_autodiff_with_llvm_err,
         };
         self.0
             .into_diag(dcx, level)
@@ -206,12 +215,6 @@ pub(crate) struct MismatchedDataLayout<'a> {
 }
 
 #[derive(Diagnostic)]
-#[diag(codegen_llvm_invalid_target_feature_prefix)]
-pub(crate) struct InvalidTargetFeaturePrefix<'a> {
-    pub feature: &'a str,
-}
-
-#[derive(Diagnostic)]
 #[diag(codegen_llvm_fixed_x18_invalid_arch)]
 pub(crate) struct FixedX18InvalidArch<'a> {
     pub arch: &'a str,
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index da7f94e8cf7..cabcfc9b42b 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -340,6 +340,37 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                     self.const_i32(cache_type),
                 ])
             }
+            sym::carrying_mul_add => {
+                let (size, signed) = fn_args.type_at(0).int_size_and_signed(self.tcx);
+
+                let wide_llty = self.type_ix(size.bits() * 2);
+                let args = args.as_array().unwrap();
+                let [a, b, c, d] = args.map(|a| self.intcast(a.immediate(), wide_llty, signed));
+
+                let wide = if signed {
+                    let prod = self.unchecked_smul(a, b);
+                    let acc = self.unchecked_sadd(prod, c);
+                    self.unchecked_sadd(acc, d)
+                } else {
+                    let prod = self.unchecked_umul(a, b);
+                    let acc = self.unchecked_uadd(prod, c);
+                    self.unchecked_uadd(acc, d)
+                };
+
+                let narrow_llty = self.type_ix(size.bits());
+                let low = self.trunc(wide, narrow_llty);
+                let bits_const = self.const_uint(wide_llty, size.bits());
+                // No need for ashr when signed; LLVM changes it to lshr anyway.
+                let high = self.lshr(wide, bits_const);
+                // FIXME: could be `trunc nuw`, even for signed.
+                let high = self.trunc(high, narrow_llty);
+
+                let pair_llty = self.type_struct(&[narrow_llty, narrow_llty], false);
+                let pair = self.const_poison(pair_llty);
+                let pair = self.insert_value(pair, low, 0);
+                let pair = self.insert_value(pair, high, 1);
+                pair
+            }
             sym::ctlz
             | sym::ctlz_nonzero
             | sym::cttz
@@ -352,84 +383,84 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
             | sym::saturating_add
             | sym::saturating_sub => {
                 let ty = arg_tys[0];
-                match int_type_width_signed(ty, self) {
-                    Some((width, signed)) => match name {
-                        sym::ctlz | sym::cttz => {
-                            let y = self.const_bool(false);
-                            let ret = self.call_intrinsic(&format!("llvm.{name}.i{width}"), &[
-                                args[0].immediate(),
-                                y,
-                            ]);
-
-                            self.intcast(ret, llret_ty, false)
-                        }
-                        sym::ctlz_nonzero => {
-                            let y = self.const_bool(true);
-                            let llvm_name = &format!("llvm.ctlz.i{width}");
-                            let ret = self.call_intrinsic(llvm_name, &[args[0].immediate(), y]);
-                            self.intcast(ret, llret_ty, false)
-                        }
-                        sym::cttz_nonzero => {
-                            let y = self.const_bool(true);
-                            let llvm_name = &format!("llvm.cttz.i{width}");
-                            let ret = self.call_intrinsic(llvm_name, &[args[0].immediate(), y]);
-                            self.intcast(ret, llret_ty, false)
-                        }
-                        sym::ctpop => {
-                            let ret = self.call_intrinsic(&format!("llvm.ctpop.i{width}"), &[args
-                                [0]
-                            .immediate()]);
-                            self.intcast(ret, llret_ty, false)
-                        }
-                        sym::bswap => {
-                            if width == 8 {
-                                args[0].immediate() // byte swap a u8/i8 is just a no-op
-                            } else {
-                                self.call_intrinsic(&format!("llvm.bswap.i{width}"), &[
-                                    args[0].immediate()
-                                ])
-                            }
-                        }
-                        sym::bitreverse => self
-                            .call_intrinsic(&format!("llvm.bitreverse.i{width}"), &[
+                if !ty.is_integral() {
+                    tcx.dcx().emit_err(InvalidMonomorphization::BasicIntegerType {
+                        span,
+                        name,
+                        ty,
+                    });
+                    return Ok(());
+                }
+                let (size, signed) = ty.int_size_and_signed(self.tcx);
+                let width = size.bits();
+                match name {
+                    sym::ctlz | sym::cttz => {
+                        let y = self.const_bool(false);
+                        let ret = self.call_intrinsic(&format!("llvm.{name}.i{width}"), &[
+                            args[0].immediate(),
+                            y,
+                        ]);
+
+                        self.intcast(ret, llret_ty, false)
+                    }
+                    sym::ctlz_nonzero => {
+                        let y = self.const_bool(true);
+                        let llvm_name = &format!("llvm.ctlz.i{width}");
+                        let ret = self.call_intrinsic(llvm_name, &[args[0].immediate(), y]);
+                        self.intcast(ret, llret_ty, false)
+                    }
+                    sym::cttz_nonzero => {
+                        let y = self.const_bool(true);
+                        let llvm_name = &format!("llvm.cttz.i{width}");
+                        let ret = self.call_intrinsic(llvm_name, &[args[0].immediate(), y]);
+                        self.intcast(ret, llret_ty, false)
+                    }
+                    sym::ctpop => {
+                        let ret = self.call_intrinsic(&format!("llvm.ctpop.i{width}"), &[
+                            args[0].immediate()
+                        ]);
+                        self.intcast(ret, llret_ty, false)
+                    }
+                    sym::bswap => {
+                        if width == 8 {
+                            args[0].immediate() // byte swap a u8/i8 is just a no-op
+                        } else {
+                            self.call_intrinsic(&format!("llvm.bswap.i{width}"), &[
                                 args[0].immediate()
-                            ]),
-                        sym::rotate_left | sym::rotate_right => {
-                            let is_left = name == sym::rotate_left;
-                            let val = args[0].immediate();
-                            let raw_shift = args[1].immediate();
-                            // rotate = funnel shift with first two args the same
-                            let llvm_name =
-                                &format!("llvm.fsh{}.i{}", if is_left { 'l' } else { 'r' }, width);
-
-                            // llvm expects shift to be the same type as the values, but rust
-                            // always uses `u32`.
-                            let raw_shift = self.intcast(raw_shift, self.val_ty(val), false);
-
-                            self.call_intrinsic(llvm_name, &[val, val, raw_shift])
+                            ])
                         }
-                        sym::saturating_add | sym::saturating_sub => {
-                            let is_add = name == sym::saturating_add;
-                            let lhs = args[0].immediate();
-                            let rhs = args[1].immediate();
-                            let llvm_name = &format!(
-                                "llvm.{}{}.sat.i{}",
-                                if signed { 's' } else { 'u' },
-                                if is_add { "add" } else { "sub" },
-                                width
-                            );
-                            self.call_intrinsic(llvm_name, &[lhs, rhs])
-                        }
-                        _ => bug!(),
-                    },
-                    None => {
-                        tcx.dcx().emit_err(InvalidMonomorphization::BasicIntegerType {
-                            span,
-                            name,
-                            ty,
-                        });
-                        return Ok(());
                     }
+                    sym::bitreverse => self
+                        .call_intrinsic(&format!("llvm.bitreverse.i{width}"), &[
+                            args[0].immediate()
+                        ]),
+                    sym::rotate_left | sym::rotate_right => {
+                        let is_left = name == sym::rotate_left;
+                        let val = args[0].immediate();
+                        let raw_shift = args[1].immediate();
+                        // rotate = funnel shift with first two args the same
+                        let llvm_name =
+                            &format!("llvm.fsh{}.i{}", if is_left { 'l' } else { 'r' }, width);
+
+                        // llvm expects shift to be the same type as the values, but rust
+                        // always uses `u32`.
+                        let raw_shift = self.intcast(raw_shift, self.val_ty(val), false);
+
+                        self.call_intrinsic(llvm_name, &[val, val, raw_shift])
+                    }
+                    sym::saturating_add | sym::saturating_sub => {
+                        let is_add = name == sym::saturating_add;
+                        let lhs = args[0].immediate();
+                        let rhs = args[1].immediate();
+                        let llvm_name = &format!(
+                            "llvm.{}{}.sat.i{}",
+                            if signed { 's' } else { 'u' },
+                            if is_add { "add" } else { "sub" },
+                            width
+                        );
+                        self.call_intrinsic(llvm_name, &[lhs, rhs])
+                    }
+                    _ => bug!(),
                 }
             }
 
@@ -1534,6 +1565,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             sym::simd_flog => ("log", bx.type_func(&[vec_ty], vec_ty)),
             sym::simd_floor => ("floor", bx.type_func(&[vec_ty], vec_ty)),
             sym::simd_fma => ("fma", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)),
+            sym::simd_relaxed_fma => ("fmuladd", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)),
             sym::simd_fpowi => ("powi", bx.type_func(&[vec_ty, bx.type_i32()], vec_ty)),
             sym::simd_fpow => ("pow", bx.type_func(&[vec_ty, vec_ty], vec_ty)),
             sym::simd_fsin => ("sin", bx.type_func(&[vec_ty], vec_ty)),
@@ -1572,6 +1604,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             | sym::simd_fpowi
             | sym::simd_fsin
             | sym::simd_fsqrt
+            | sym::simd_relaxed_fma
             | sym::simd_round
             | sym::simd_trunc
     ) {
@@ -2529,19 +2562,3 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
 
     span_bug!(span, "unknown SIMD intrinsic");
 }
-
-// Returns the width of an int Ty, and if it's signed or not
-// Returns None if the type is not an integer
-// FIXME: there’s multiple of this functions, investigate using some of the already existing
-// stuffs.
-fn int_type_width_signed(ty: Ty<'_>, cx: &CodegenCx<'_, '_>) -> Option<(u64, bool)> {
-    match ty.kind() {
-        ty::Int(t) => {
-            Some((t.bit_width().unwrap_or(u64::from(cx.tcx.sess.target.pointer_width)), true))
-        }
-        ty::Uint(t) => {
-            Some((t.bit_width().unwrap_or(u64::from(cx.tcx.sess.target.pointer_width)), false))
-        }
-        _ => None,
-    }
-}
diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
index 9f398107fc6..06afe8bb3ad 100644
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -17,6 +17,7 @@
 #![feature(iter_intersperse)]
 #![feature(let_chains)]
 #![feature(rustdoc_internals)]
+#![feature(slice_as_array)]
 #![feature(try_blocks)]
 #![warn(unreachable_pub)]
 // tidy-alphabetical-end
@@ -27,9 +28,10 @@ use std::mem::ManuallyDrop;
 
 use back::owned_target_machine::OwnedTargetMachine;
 use back::write::{create_informational_target_machine, create_target_machine};
-use errors::ParseTargetMachineConfig;
-pub use llvm_util::target_features;
+use errors::{AutoDiffWithoutLTO, ParseTargetMachineConfig};
+pub use llvm_util::target_features_cfg;
 use rustc_ast::expand::allocator::AllocatorKind;
+use rustc_ast::expand::autodiff_attrs::AutoDiffItem;
 use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule};
 use rustc_codegen_ssa::back::write::{
     CodegenContext, FatLtoInput, ModuleConfig, TargetMachineFactoryConfig, TargetMachineFactoryFn,
@@ -37,14 +39,14 @@ use rustc_codegen_ssa::back::write::{
 use rustc_codegen_ssa::traits::*;
 use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen};
 use rustc_data_structures::fx::FxIndexMap;
-use rustc_errors::{DiagCtxtHandle, ErrorGuaranteed, FatalError};
+use rustc_errors::{DiagCtxtHandle, FatalError};
 use rustc_metadata::EncodedMetadata;
 use rustc_middle::dep_graph::{WorkProduct, WorkProductId};
 use rustc_middle::ty::TyCtxt;
 use rustc_middle::util::Providers;
 use rustc_session::Session;
-use rustc_session::config::{OptLevel, OutputFilenames, PrintKind, PrintRequest};
-use rustc_span::symbol::Symbol;
+use rustc_session::config::{Lto, OptLevel, OutputFilenames, PrintKind, PrintRequest};
+use rustc_span::Symbol;
 
 mod back {
     pub(crate) mod archive;
@@ -232,6 +234,20 @@ impl WriteBackendMethods for LlvmCodegenBackend {
     fn serialize_module(module: ModuleCodegen<Self::Module>) -> (String, Self::ModuleBuffer) {
         (module.name, back::lto::ModuleBuffer::new(module.module_llvm.llmod()))
     }
+    /// Generate autodiff rules
+    fn autodiff(
+        cgcx: &CodegenContext<Self>,
+        tcx: TyCtxt<'_>,
+        module: &ModuleCodegen<Self::Module>,
+        diff_fncs: Vec<AutoDiffItem>,
+        config: &ModuleConfig,
+    ) -> Result<(), FatalError> {
+        if cgcx.lto != Lto::Fat {
+            let dcx = cgcx.create_dcx();
+            return Err(dcx.handle().emit_almost_fatal(AutoDiffWithoutLTO));
+        }
+        builder::autodiff::differentiate(module, cgcx, tcx, diff_fncs, config)
+    }
 }
 
 unsafe impl Send for LlvmCodegenBackend {} // Llvm is on a per-thread basis
@@ -331,8 +347,8 @@ impl CodegenBackend for LlvmCodegenBackend {
         llvm_util::print_version();
     }
 
-    fn target_features(&self, sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
-        target_features(sess, allow_unstable)
+    fn target_features_cfg(&self, sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
+        target_features_cfg(sess, allow_unstable)
     }
 
     fn codegen_crate<'tcx>(
@@ -371,19 +387,14 @@ impl CodegenBackend for LlvmCodegenBackend {
         (codegen_results, work_products)
     }
 
-    fn link(
-        &self,
-        sess: &Session,
-        codegen_results: CodegenResults,
-        outputs: &OutputFilenames,
-    ) -> Result<(), ErrorGuaranteed> {
+    fn link(&self, sess: &Session, codegen_results: CodegenResults, outputs: &OutputFilenames) {
         use rustc_codegen_ssa::back::link::link_binary;
 
         use crate::back::archive::LlvmArchiveBuilderBuilder;
 
         // Run the linker on any artifacts that resulted from the LLVM run.
         // This should produce either a finished executable or library.
-        link_binary(sess, &LlvmArchiveBuilderBuilder, codegen_results, outputs)
+        link_binary(sess, &LlvmArchiveBuilderBuilder, codegen_results, outputs);
     }
 }
 
diff --git a/compiler/rustc_codegen_llvm/src/llvm/diagnostic.rs b/compiler/rustc_codegen_llvm/src/llvm/diagnostic.rs
index a4cb5a25d1b..11043b664f5 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/diagnostic.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/diagnostic.rs
@@ -151,7 +151,7 @@ impl InlineAsmDiagnostic {
             unsafe { SrcMgrDiagnostic::unpack(super::LLVMRustGetSMDiagnostic(di, &mut cookie)) };
         InlineAsmDiagnostic {
             level: smdiag.level,
-            cookie: cookie.into(),
+            cookie,
             message: smdiag.message,
             source: smdiag.source,
         }
diff --git a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
new file mode 100644
index 00000000000..729d6f62e24
--- /dev/null
+++ b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
@@ -0,0 +1,29 @@
+#![allow(non_camel_case_types)]
+
+use libc::{c_char, c_uint};
+
+use super::ffi::{BasicBlock, Metadata, Module, Type, Value};
+use crate::llvm::Bool;
+extern "C" {
+    // Enzyme
+    pub fn LLVMRustHasMetadata(I: &Value, KindID: c_uint) -> bool;
+    pub fn LLVMRustEraseInstBefore(BB: &BasicBlock, I: &Value);
+    pub fn LLVMRustGetLastInstruction<'a>(BB: &BasicBlock) -> Option<&'a Value>;
+    pub fn LLVMRustDIGetInstMetadata(I: &Value) -> Option<&Metadata>;
+    pub fn LLVMRustEraseInstFromParent(V: &Value);
+    pub fn LLVMRustGetTerminator<'a>(B: &BasicBlock) -> &'a Value;
+    pub fn LLVMRustVerifyFunction(V: &Value, action: LLVMRustVerifierFailureAction) -> Bool;
+
+    pub fn LLVMGetFunctionCallConv(F: &Value) -> c_uint;
+    pub fn LLVMGetReturnType(T: &Type) -> &Type;
+    pub fn LLVMGetParams(Fnc: &Value, parms: *mut &Value);
+    pub fn LLVMGetNamedFunction(M: &Module, Name: *const c_char) -> Option<&Value>;
+}
+
+#[repr(C)]
+#[derive(Copy, Clone, PartialEq)]
+pub enum LLVMRustVerifierFailureAction {
+    LLVMAbortProcessAction = 0,
+    LLVMPrintMessageAction = 1,
+    LLVMReturnStatusAction = 2,
+}
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index 17b0ec4b936..cb4a8c9a5f2 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -17,7 +17,9 @@ use super::debuginfo::{
     DebugEmissionKind, DebugNameTableKind,
 };
 
-pub type Bool = c_uint;
+/// In the LLVM-C API, boolean values are passed as `typedef int LLVMBool`,
+/// which has a different ABI from Rust or C++ `bool`.
+pub type Bool = c_int;
 
 pub const True: Bool = 1 as Bool;
 pub const False: Bool = 0 as Bool;
@@ -54,25 +56,6 @@ pub enum LLVMRustResult {
     Failure,
 }
 
-// Rust version of the C struct with the same name in rustc_llvm/llvm-wrapper/RustWrapper.cpp.
-#[repr(C)]
-pub struct LLVMRustCOFFShortExport {
-    pub name: *const c_char,
-    pub ordinal_present: bool,
-    /// value of `ordinal` only important when `ordinal_present` is true
-    pub ordinal: u16,
-}
-
-impl LLVMRustCOFFShortExport {
-    pub fn new(name: *const c_char, ordinal: Option<u16>) -> LLVMRustCOFFShortExport {
-        LLVMRustCOFFShortExport {
-            name,
-            ordinal_present: ordinal.is_some(),
-            ordinal: ordinal.unwrap_or(0),
-        }
-    }
-}
-
 /// Translation of LLVM's MachineTypes enum, defined in llvm\include\llvm\BinaryFormat\COFF.h.
 ///
 /// We include only architectures supported on Windows.
@@ -116,7 +99,7 @@ pub enum ModuleFlagMergeBehavior {
 /// LLVM CallingConv::ID. Should we wrap this?
 ///
 /// See <https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/IR/CallingConv.h>
-#[derive(Copy, Clone, PartialEq, Debug)]
+#[derive(Copy, Clone, PartialEq, Debug, TryFromU32)]
 #[repr(C)]
 pub enum CallConv {
     CCallConv = 0,
@@ -543,7 +526,7 @@ pub struct SanitizerOptions {
     pub sanitize_kernel_address_recover: bool,
 }
 
-/// LLVMRelocMode
+/// LLVMRustRelocModel
 #[derive(Copy, Clone, PartialEq)]
 #[repr(C)]
 pub enum RelocModel {
@@ -555,6 +538,15 @@ pub enum RelocModel {
     ROPI_RWPI,
 }
 
+/// LLVMRustFloatABI
+#[derive(Copy, Clone, PartialEq)]
+#[repr(C)]
+pub enum FloatAbi {
+    Default,
+    Soft,
+    Hard,
+}
+
 /// LLVMRustCodeModel
 #[derive(Copy, Clone)]
 #[repr(C)]
@@ -2016,6 +2008,12 @@ unsafe extern "C" {
         AlignInBits: u32,
     ) -> &'a DIDerivedType;
 
+    pub fn LLVMRustDIBuilderCreateQualifiedType<'a>(
+        Builder: &DIBuilder<'a>,
+        Tag: c_uint,
+        Type: &'a DIType,
+    ) -> &'a DIDerivedType;
+
     pub fn LLVMRustDIBuilderCreateLexicalBlock<'a>(
         Builder: &DIBuilder<'a>,
         Scope: &'a DIScope,
@@ -2179,9 +2177,6 @@ unsafe extern "C" {
         Location: &'a DILocation,
         BD: c_uint,
     ) -> Option<&'a DILocation>;
-    pub fn LLVMRustDIBuilderCreateOpDeref() -> u64;
-    pub fn LLVMRustDIBuilderCreateOpPlusUconst() -> u64;
-    pub fn LLVMRustDIBuilderCreateOpLLVMFragment() -> u64;
 
     pub fn LLVMRustWriteTypeToString(Type: &Type, s: &RustString);
     pub fn LLVMRustWriteValueToString(value_ref: &Value, s: &RustString);
@@ -2209,7 +2204,7 @@ unsafe extern "C" {
         Model: CodeModel,
         Reloc: RelocModel,
         Level: CodeGenOptLevel,
-        UseSoftFP: bool,
+        FloatABIType: FloatAbi,
         FunctionSections: bool,
         DataSections: bool,
         UniqueSectionNames: bool,
@@ -2240,6 +2235,7 @@ unsafe extern "C" {
         Output: *const c_char,
         DwoOutput: *const c_char,
         FileType: FileType,
+        VerifyIR: bool,
     ) -> LLVMRustResult;
     pub fn LLVMRustOptimize<'a>(
         M: &'a Module,
@@ -2316,7 +2312,7 @@ unsafe extern "C" {
 
     pub fn LLVMRustGetSMDiagnostic<'a>(
         DI: &'a DiagnosticInfo,
-        cookie_out: &mut c_uint,
+        cookie_out: &mut u64,
     ) -> &'a SMDiagnostic;
 
     pub fn LLVMRustUnpackSMDiagnostic(
@@ -2344,15 +2340,6 @@ unsafe extern "C" {
     ) -> &'a mut RustArchiveMember<'a>;
     pub fn LLVMRustArchiveMemberFree<'a>(Member: &'a mut RustArchiveMember<'a>);
 
-    pub fn LLVMRustWriteImportLibrary(
-        ImportName: *const c_char,
-        Path: *const c_char,
-        Exports: *const LLVMRustCOFFShortExport,
-        NumExports: usize,
-        Machine: u16,
-        MinGW: bool,
-    ) -> LLVMRustResult;
-
     pub fn LLVMRustSetDataLayoutFromTargetMachine<'a>(M: &'a Module, TM: &'a TargetMachine);
 
     pub fn LLVMRustPositionBuilderAtStart<'a>(B: &Builder<'a>, BB: &'a BasicBlock);
@@ -2387,7 +2374,7 @@ unsafe extern "C" {
         Data: &ThinLTOData,
         Module: &Module,
         Target: &TargetMachine,
-    ) -> bool;
+    );
     pub fn LLVMRustPrepareThinLTOResolveWeak(Data: &ThinLTOData, Module: &Module) -> bool;
     pub fn LLVMRustPrepareThinLTOInternalize(Data: &ThinLTOData, Module: &Module) -> bool;
     pub fn LLVMRustPrepareThinLTOImport(
diff --git a/compiler/rustc_codegen_llvm/src/llvm/mod.rs b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
index 909afe35a17..2592a7df95c 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
@@ -22,8 +22,11 @@ use crate::common::AsCCharPtr;
 
 pub mod archive_ro;
 pub mod diagnostic;
+pub mod enzyme_ffi;
 mod ffi;
 
+pub use self::enzyme_ffi::*;
+
 impl LLVMRustResult {
     pub fn into_result(self) -> Result<(), ()> {
         match self {
diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs
index db2b03d9aed..c3d7c217861 100644
--- a/compiler/rustc_codegen_llvm/src/llvm_util.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs
@@ -15,14 +15,14 @@ use rustc_fs_util::path_to_c_string;
 use rustc_middle::bug;
 use rustc_session::Session;
 use rustc_session::config::{PrintKind, PrintRequest};
-use rustc_span::symbol::Symbol;
+use rustc_span::Symbol;
 use rustc_target::spec::{MergeFunctions, PanicStrategy, SmallDataThresholdSupport};
-use rustc_target::target_features::{RUSTC_SPECIAL_FEATURES, RUSTC_SPECIFIC_FEATURES, Stability};
+use rustc_target::target_features::{RUSTC_SPECIAL_FEATURES, RUSTC_SPECIFIC_FEATURES};
 
 use crate::back::write::create_informational_target_machine;
 use crate::errors::{
-    FixedX18InvalidArch, ForbiddenCTargetFeature, InvalidTargetFeaturePrefix, PossibleFeature,
-    UnknownCTargetFeature, UnknownCTargetFeaturePrefix, UnstableCTargetFeature,
+    FixedX18InvalidArch, ForbiddenCTargetFeature, PossibleFeature, UnknownCTargetFeature,
+    UnknownCTargetFeaturePrefix, UnstableCTargetFeature,
 };
 use crate::llvm;
 
@@ -109,7 +109,10 @@ unsafe fn configure_llvm(sess: &Session) {
             add("-wasm-enable-eh", false);
         }
 
-        if sess.target.os == "emscripten" && sess.panic_strategy() == PanicStrategy::Unwind {
+        if sess.target.os == "emscripten"
+            && !sess.opts.unstable_opts.emscripten_wasm_eh
+            && sess.panic_strategy() == PanicStrategy::Unwind
+        {
             add("-enable-emscripten-cxx-exceptions", false);
         }
 
@@ -230,6 +233,8 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
         "aarch64"
     } else if sess.target.arch == "sparc64" {
         "sparc"
+    } else if sess.target.arch == "powerpc64" {
+        "powerpc"
     } else {
         &*sess.target.arch
     };
@@ -289,6 +294,7 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
         // https://github.com/llvm/llvm-project/blob/llvmorg-18.1.0/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp#L26
         ("sparc", "v8plus") if get_version().0 == 19 => Some(LLVMFeature::new("v9")),
         ("sparc", "v8plus") if get_version().0 < 19 => None,
+        ("powerpc", "power8-crypto") => Some(LLVMFeature::new("crypto")),
         (_, s) => Some(LLVMFeature::new(s)),
     }
 }
@@ -297,7 +303,7 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
 /// Must express features in the way Rust understands them.
 ///
 /// We do not have to worry about RUSTC_SPECIFIC_FEATURES here, those are handled outside codegen.
-pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
+pub fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
     let mut features: FxHashSet<Symbol> = Default::default();
 
     // Add base features for the target.
@@ -313,7 +319,7 @@ pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
         sess.target
             .rust_target_features()
             .iter()
-            .filter(|(_, gate, _)| gate.is_supported())
+            .filter(|(_, gate, _)| gate.in_cfg())
             .filter(|(feature, _, _)| {
                 // skip checking special features, as LLVM may not understand them
                 if RUSTC_SPECIAL_FEATURES.contains(feature) {
@@ -345,7 +351,16 @@ pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
     {
         if enabled {
             // Also add all transitively implied features.
-            features.extend(sess.target.implied_target_features(std::iter::once(feature)));
+
+            // We don't care about the order in `features` since the only thing we use it for is the
+            // `features.contains` below.
+            #[allow(rustc::potential_query_instability)]
+            features.extend(
+                sess.target
+                    .implied_target_features(std::iter::once(feature.as_str()))
+                    .iter()
+                    .map(|s| Symbol::intern(s)),
+            );
         } else {
             // Remove transitively reverse-implied features.
 
@@ -353,7 +368,11 @@ pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
             // `features.contains` below.
             #[allow(rustc::potential_query_instability)]
             features.retain(|f| {
-                if sess.target.implied_target_features(std::iter::once(*f)).contains(&feature) {
+                if sess
+                    .target
+                    .implied_target_features(std::iter::once(f.as_str()))
+                    .contains(&feature.as_str())
+                {
                     // If `f` if implies `feature`, then `!feature` implies `!f`, so we have to
                     // remove `f`. (This is the standard logical contraposition principle.)
                     false
@@ -369,10 +388,10 @@ pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
     sess.target
         .rust_target_features()
         .iter()
-        .filter(|(_, gate, _)| gate.is_supported())
-        .filter_map(|&(feature, gate, _)| {
-            if sess.is_nightly_build() || allow_unstable || gate.is_stable() {
-                Some(feature)
+        .filter(|(_, gate, _)| gate.in_cfg())
+        .filter_map(|(feature, gate, _)| {
+            if sess.is_nightly_build() || allow_unstable || gate.requires_nightly().is_none() {
+                Some(*feature)
             } else {
                 None
             }
@@ -490,7 +509,7 @@ fn print_target_features(sess: &Session, tm: &llvm::TargetMachine, out: &mut Str
         .rust_target_features()
         .iter()
         .filter_map(|(feature, gate, _implied)| {
-            if !gate.is_supported() {
+            if !gate.in_cfg() {
                 // Only list (experimentally) supported features.
                 return None;
             }
@@ -635,7 +654,7 @@ pub(crate) fn global_llvm_features(
         sess.target
             .features
             .split(',')
-            .filter(|v| !v.is_empty() && backend_feature_name(sess, v).is_some())
+            .filter(|v| !v.is_empty())
             // Drop +v8plus feature introduced in LLVM 20.
             .filter(|v| *v != "+v8plus" || get_version() >= (20, 0, 0))
             .map(String::from),
@@ -648,85 +667,136 @@ pub(crate) fn global_llvm_features(
     // -Ctarget-features
     if !only_base_features {
         let known_features = sess.target.rust_target_features();
+        // Will only be filled when `diagnostics` is set!
         let mut featsmap = FxHashMap::default();
 
-        // insert implied features
+        // Ensure that all ABI-required features are enabled, and the ABI-forbidden ones
+        // are disabled.
+        let abi_feature_constraints = sess.target.abi_required_features();
+        let abi_incompatible_set =
+            FxHashSet::from_iter(abi_feature_constraints.incompatible.iter().copied());
+
+        // Compute implied features
         let mut all_rust_features = vec![];
         for feature in sess.opts.cg.target_feature.split(',') {
-            match feature.strip_prefix('+') {
-                Some(feature) => all_rust_features.extend(
-                    UnordSet::from(
-                        sess.target
-                            .implied_target_features(std::iter::once(Symbol::intern(feature))),
-                    )
-                    .to_sorted_stable_ord()
-                    .iter()
-                    .map(|s| format!("+{}", s.as_str())),
-                ),
-                _ => all_rust_features.push(feature.to_string()),
+            if let Some(feature) = feature.strip_prefix('+') {
+                all_rust_features.extend(
+                    UnordSet::from(sess.target.implied_target_features(std::iter::once(feature)))
+                        .to_sorted_stable_ord()
+                        .iter()
+                        .map(|&&s| (true, s)),
+                )
+            } else if let Some(feature) = feature.strip_prefix('-') {
+                // FIXME: Why do we not remove implied features on "-" here?
+                // We do the equivalent above in `target_features_cfg`.
+                // See <https://github.com/rust-lang/rust/issues/134792>.
+                all_rust_features.push((false, feature));
+            } else if !feature.is_empty() {
+                if diagnostics {
+                    sess.dcx().emit_warn(UnknownCTargetFeaturePrefix { feature });
+                }
             }
         }
+        // Remove features that are meant for rustc, not LLVM.
+        all_rust_features.retain(|(_, feature)| {
+            // Retain if it is not a rustc feature
+            !RUSTC_SPECIFIC_FEATURES.contains(feature)
+        });
 
-        let feats = all_rust_features
-            .iter()
-            .filter_map(|s| {
-                let enable_disable = match s.chars().next() {
-                    None => return None,
-                    Some(c @ ('+' | '-')) => c,
-                    Some(_) => {
-                        if diagnostics {
-                            sess.dcx().emit_warn(UnknownCTargetFeaturePrefix { feature: s });
-                        }
-                        return None;
-                    }
-                };
-
-                // Get the backend feature name, if any.
-                // This excludes rustc-specific features, which do not get passed to LLVM.
-                let feature = backend_feature_name(sess, s)?;
-                // Warn against use of LLVM specific feature names and unstable features on the CLI.
-                if diagnostics {
-                    let feature_state = known_features.iter().find(|&&(v, _, _)| v == feature);
-                    match feature_state {
-                        None => {
-                            let rust_feature =
-                                known_features.iter().find_map(|&(rust_feature, _, _)| {
-                                    let llvm_features = to_llvm_features(sess, rust_feature)?;
-                                    if llvm_features.contains(feature)
-                                        && !llvm_features.contains(rust_feature)
-                                    {
-                                        Some(rust_feature)
-                                    } else {
-                                        None
-                                    }
-                                });
-                            let unknown_feature = if let Some(rust_feature) = rust_feature {
-                                UnknownCTargetFeature {
-                                    feature,
-                                    rust_feature: PossibleFeature::Some { rust_feature },
+        // Check feature validity.
+        if diagnostics {
+            for &(enable, feature) in &all_rust_features {
+                let feature_state = known_features.iter().find(|&&(v, _, _)| v == feature);
+                match feature_state {
+                    None => {
+                        let rust_feature =
+                            known_features.iter().find_map(|&(rust_feature, _, _)| {
+                                let llvm_features = to_llvm_features(sess, rust_feature)?;
+                                if llvm_features.contains(feature)
+                                    && !llvm_features.contains(rust_feature)
+                                {
+                                    Some(rust_feature)
+                                } else {
+                                    None
                                 }
-                            } else {
-                                UnknownCTargetFeature {
-                                    feature,
-                                    rust_feature: PossibleFeature::None,
-                                }
-                            };
-                            sess.dcx().emit_warn(unknown_feature);
-                        }
-                        Some((_, Stability::Stable, _)) => {}
-                        Some((_, Stability::Unstable(_), _)) => {
-                            // An unstable feature. Warn about using it.
+                            });
+                        let unknown_feature = if let Some(rust_feature) = rust_feature {
+                            UnknownCTargetFeature {
+                                feature,
+                                rust_feature: PossibleFeature::Some { rust_feature },
+                            }
+                        } else {
+                            UnknownCTargetFeature { feature, rust_feature: PossibleFeature::None }
+                        };
+                        sess.dcx().emit_warn(unknown_feature);
+                    }
+                    Some((_, stability, _)) => {
+                        if let Err(reason) = stability.toggle_allowed() {
+                            sess.dcx().emit_warn(ForbiddenCTargetFeature {
+                                feature,
+                                enabled: if enable { "enabled" } else { "disabled" },
+                                reason,
+                            });
+                        } else if stability.requires_nightly().is_some() {
+                            // An unstable feature. Warn about using it. It makes little sense
+                            // to hard-error here since we just warn about fully unknown
+                            // features above.
                             sess.dcx().emit_warn(UnstableCTargetFeature { feature });
                         }
-                        Some((_, Stability::Forbidden { reason }, _)) => {
-                            sess.dcx().emit_warn(ForbiddenCTargetFeature { feature, reason });
-                        }
                     }
+                }
 
-                    // FIXME(nagisa): figure out how to not allocate a full hashset here.
-                    featsmap.insert(feature, enable_disable == '+');
+                // Ensure that the features we enable/disable are compatible with the ABI.
+                if enable {
+                    if abi_incompatible_set.contains(feature) {
+                        sess.dcx().emit_warn(ForbiddenCTargetFeature {
+                            feature,
+                            enabled: "enabled",
+                            reason: "this feature is incompatible with the target ABI",
+                        });
+                    }
+                } else {
+                    // FIXME: we have to request implied features here since
+                    // negative features do not handle implied features above.
+                    for &required in abi_feature_constraints.required.iter() {
+                        let implied =
+                            sess.target.implied_target_features(std::iter::once(required));
+                        if implied.contains(feature) {
+                            sess.dcx().emit_warn(ForbiddenCTargetFeature {
+                                feature,
+                                enabled: "disabled",
+                                reason: "this feature is required by the target ABI",
+                            });
+                        }
+                    }
                 }
 
+                // FIXME(nagisa): figure out how to not allocate a full hashset here.
+                featsmap.insert(feature, enable);
+            }
+        }
+
+        // To be sure the ABI-relevant features are all in the right state, we explicitly
+        // (un)set them here. This means if the target spec sets those features wrong,
+        // we will silently correct them rather than silently producing wrong code.
+        // (The target sanity check tries to catch this, but we can't know which features are
+        // enabled in LLVM by default so we can't be fully sure about that check.)
+        // We add these at the beginning of the list so that `-Ctarget-features` can
+        // still override it... that's unsound, but more compatible with past behavior.
+        all_rust_features.splice(
+            0..0,
+            abi_feature_constraints
+                .required
+                .iter()
+                .map(|&f| (true, f))
+                .chain(abi_feature_constraints.incompatible.iter().map(|&f| (false, f))),
+        );
+
+        // Translate this into LLVM features.
+        let feats = all_rust_features
+            .iter()
+            .filter_map(|&(enable, feature)| {
+                let enable_disable = if enable { '+' } else { '-' };
                 // We run through `to_llvm_features` when
                 // passing requests down to LLVM. This means that all in-language
                 // features also work on the command line instead of having two
@@ -739,9 +809,9 @@ pub(crate) fn global_llvm_features(
                         enable_disable, llvm_feature.llvm_feature_name
                     ))
                     .chain(llvm_feature.dependency.into_iter().filter_map(
-                        move |feat| match (enable_disable, feat) {
-                            ('-' | '+', TargetFeatureFoldStrength::Both(f))
-                            | ('+', TargetFeatureFoldStrength::EnableOnly(f)) => {
+                        move |feat| match (enable, feat) {
+                            (_, TargetFeatureFoldStrength::Both(f))
+                            | (true, TargetFeatureFoldStrength::EnableOnly(f)) => {
                                 Some(format!("{enable_disable}{f}"))
                             }
                             _ => None,
@@ -773,22 +843,6 @@ pub(crate) fn global_llvm_features(
     features
 }
 
-/// Returns a feature name for the given `+feature` or `-feature` string.
-///
-/// Only allows features that are backend specific (i.e. not [`RUSTC_SPECIFIC_FEATURES`].)
-fn backend_feature_name<'a>(sess: &Session, s: &'a str) -> Option<&'a str> {
-    // features must start with a `+` or `-`.
-    let feature = s
-        .strip_prefix(&['+', '-'][..])
-        .unwrap_or_else(|| sess.dcx().emit_fatal(InvalidTargetFeaturePrefix { feature: s }));
-    // Rustc-specific feature requests like `+crt-static` or `-crt-static`
-    // are not passed down to LLVM.
-    if s.is_empty() || RUSTC_SPECIFIC_FEATURES.contains(&feature) {
-        return None;
-    }
-    Some(feature)
-}
-
 pub(crate) fn tune_cpu(sess: &Session) -> Option<&str> {
     let name = sess.opts.unstable_opts.tune_cpu.as_ref()?;
     Some(handle_native(name))
diff --git a/compiler/rustc_codegen_llvm/src/type_of.rs b/compiler/rustc_codegen_llvm/src/type_of.rs
index 2b05e24a7ba..b0b6da869da 100644
--- a/compiler/rustc_codegen_llvm/src/type_of.rs
+++ b/compiler/rustc_codegen_llvm/src/type_of.rs
@@ -38,7 +38,7 @@ fn uncached_llvm_type<'a, 'tcx>(
             if let (&ty::Adt(def, _), &Variants::Single { index }) =
                 (layout.ty.kind(), &layout.variants)
             {
-                if def.is_enum() && !def.variants().is_empty() {
+                if def.is_enum() {
                     write!(&mut name, "::{}", def.variant(index).name).unwrap();
                 }
             }
diff --git a/compiler/rustc_codegen_llvm/src/va_arg.rs b/compiler/rustc_codegen_llvm/src/va_arg.rs
index e4c3e748cb5..8baa69cefe1 100644
--- a/compiler/rustc_codegen_llvm/src/va_arg.rs
+++ b/compiler/rustc_codegen_llvm/src/va_arg.rs
@@ -10,6 +10,15 @@ use crate::type_::Type;
 use crate::type_of::LayoutLlvmExt;
 use crate::value::Value;
 
+fn round_up_to_alignment<'ll>(
+    bx: &mut Builder<'_, 'll, '_>,
+    mut value: &'ll Value,
+    align: Align,
+) -> &'ll Value {
+    value = bx.add(value, bx.cx().const_i32(align.bytes() as i32 - 1));
+    return bx.and(value, bx.cx().const_i32(-(align.bytes() as i32)));
+}
+
 fn round_pointer_up_to_alignment<'ll>(
     bx: &mut Builder<'_, 'll, '_>,
     addr: &'ll Value,
@@ -17,8 +26,7 @@ fn round_pointer_up_to_alignment<'ll>(
     ptr_ty: &'ll Type,
 ) -> &'ll Value {
     let mut ptr_as_int = bx.ptrtoint(addr, bx.cx().type_isize());
-    ptr_as_int = bx.add(ptr_as_int, bx.cx().const_i32(align.bytes() as i32 - 1));
-    ptr_as_int = bx.and(ptr_as_int, bx.cx().const_i32(-(align.bytes() as i32)));
+    ptr_as_int = round_up_to_alignment(bx, ptr_as_int, align);
     bx.inttoptr(ptr_as_int, ptr_ty)
 }
 
@@ -270,6 +278,106 @@ fn emit_s390x_va_arg<'ll, 'tcx>(
     bx.load(val_type, val_addr, layout.align.abi)
 }
 
+fn emit_xtensa_va_arg<'ll, 'tcx>(
+    bx: &mut Builder<'_, 'll, 'tcx>,
+    list: OperandRef<'tcx, &'ll Value>,
+    target_ty: Ty<'tcx>,
+) -> &'ll Value {
+    // Implementation of va_arg for Xtensa. There doesn't seem to be an authoritative source for
+    // this, other than "what GCC does".
+    //
+    // The va_list type has three fields:
+    // struct __va_list_tag {
+    //   int32_t *va_stk; // Arguments passed on the stack
+    //   int32_t *va_reg; // Arguments passed in registers, saved to memory by the prologue.
+    //   int32_t va_ndx; // Offset into the arguments, in bytes
+    // };
+    //
+    // The first 24 bytes (equivalent to 6 registers) come from va_reg, the rest from va_stk.
+    // Thus if va_ndx is less than 24, the next va_arg *may* read from va_reg,
+    // otherwise it must come from va_stk.
+    //
+    // Primitive arguments are never split between registers and the stack. For example, if loading an 8 byte
+    // primitive value and va_ndx = 20, we instead bump the offset and read everything from va_stk.
+    let va_list_addr = list.immediate();
+    // FIXME: handle multi-field structs that split across regsave/stack?
+    let layout = bx.cx.layout_of(target_ty);
+    let from_stack = bx.append_sibling_block("va_arg.from_stack");
+    let from_regsave = bx.append_sibling_block("va_arg.from_regsave");
+    let end = bx.append_sibling_block("va_arg.end");
+
+    // (*va).va_ndx
+    let va_reg_offset = 4;
+    let va_ndx_offset = va_reg_offset + 4;
+    let offset_ptr =
+        bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(va_ndx_offset)]);
+
+    let offset = bx.load(bx.type_i32(), offset_ptr, bx.tcx().data_layout.i32_align.abi);
+    let offset = round_up_to_alignment(bx, offset, layout.align.abi);
+
+    let slot_size = layout.size.align_to(Align::from_bytes(4).unwrap()).bytes() as i32;
+
+    // Update the offset in va_list, by adding the slot's size.
+    let offset_next = bx.add(offset, bx.const_i32(slot_size));
+
+    // Figure out where to look for our value. We do that by checking the end of our slot (offset_next).
+    // If that is within the regsave area, then load from there. Otherwise load from the stack area.
+    let regsave_size = bx.const_i32(24);
+    let use_regsave = bx.icmp(IntPredicate::IntULE, offset_next, regsave_size);
+    bx.cond_br(use_regsave, from_regsave, from_stack);
+
+    bx.switch_to_block(from_regsave);
+    // update va_ndx
+    bx.store(offset_next, offset_ptr, bx.tcx().data_layout.pointer_align.abi);
+
+    // (*va).va_reg
+    let regsave_area_ptr =
+        bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(va_reg_offset)]);
+    let regsave_area =
+        bx.load(bx.type_ptr(), regsave_area_ptr, bx.tcx().data_layout.pointer_align.abi);
+    let regsave_value_ptr = bx.inbounds_gep(bx.type_i8(), regsave_area, &[offset]);
+    bx.br(end);
+
+    bx.switch_to_block(from_stack);
+
+    // The first time we switch from regsave to stack we needs to adjust our offsets a bit.
+    // va_stk is set up such that the first stack argument is always at va_stk + 32.
+    // The corrected offset is written back into the va_list struct.
+
+    // let offset_corrected = cmp::max(offset, 32);
+    let stack_offset_start = bx.const_i32(32);
+    let needs_correction = bx.icmp(IntPredicate::IntULE, offset, stack_offset_start);
+    let offset_corrected = bx.select(needs_correction, stack_offset_start, offset);
+
+    // let offset_next_corrected = offset_corrected + slot_size;
+    // va_ndx = offset_next_corrected;
+    let offset_next_corrected = bx.add(offset_next, bx.const_i32(slot_size));
+    // update va_ndx
+    bx.store(offset_next_corrected, offset_ptr, bx.tcx().data_layout.pointer_align.abi);
+
+    // let stack_value_ptr = unsafe { (*va).va_stk.byte_add(offset_corrected) };
+    let stack_area_ptr = bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(0)]);
+    let stack_area = bx.load(bx.type_ptr(), stack_area_ptr, bx.tcx().data_layout.pointer_align.abi);
+    let stack_value_ptr = bx.inbounds_gep(bx.type_i8(), stack_area, &[offset_corrected]);
+    bx.br(end);
+
+    bx.switch_to_block(end);
+
+    // On big-endian, for values smaller than the slot size we'd have to align the read to the end
+    // of the slot rather than the start. While the ISA and GCC support big-endian, all the Xtensa
+    // targets supported by rustc are litte-endian so don't worry about it.
+
+    // if from_regsave {
+    //     unsafe { *regsave_value_ptr }
+    // } else {
+    //     unsafe { *stack_value_ptr }
+    // }
+    assert!(bx.tcx().sess.target.endian == Endian::Little);
+    let value_ptr =
+        bx.phi(bx.type_ptr(), &[regsave_value_ptr, stack_value_ptr], &[from_regsave, from_stack]);
+    return bx.load(layout.llvm_type(bx), value_ptr, layout.align.abi);
+}
+
 pub(super) fn emit_va_arg<'ll, 'tcx>(
     bx: &mut Builder<'_, 'll, 'tcx>,
     addr: OperandRef<'tcx, &'ll Value>,
@@ -302,6 +410,7 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
             let indirect: bool = target_ty_size > 8 || !target_ty_size.is_power_of_two();
             emit_ptr_va_arg(bx, addr, target_ty, indirect, Align::from_bytes(8).unwrap(), false)
         }
+        "xtensa" => emit_xtensa_va_arg(bx, addr, target_ty),
         // For all other architecture/OS combinations fall back to using
         // the LLVM va_arg instruction.
         // https://llvm.org/docs/LangRef.html#va-arg-instruction