34 files changed, 1287 insertions, 858 deletions
diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs
index d1804cb49ad..3722d4350a2 100644
--- a/compiler/rustc_codegen_llvm/src/asm.rs
+++ b/compiler/rustc_codegen_llvm/src/asm.rs
@@ -442,6 +442,14 @@ impl<'tcx> AsmCodegenMethods<'tcx> for CodegenCx<'_, 'tcx> {
             );
         }
     }
+
+    fn mangled_name(&self, instance: Instance<'tcx>) -> String {
+        let llval = self.get_fn(instance);
+        llvm::build_string(|s| unsafe {
+            llvm::LLVMRustGetMangledName(llval, s);
+        })
+        .expect("symbol is not valid UTF-8")
+    }
 }
 
 pub(crate) fn inline_asm_call<'ll>(
@@ -504,14 +512,13 @@ pub(crate) fn inline_asm_call<'ll>(
             let key = "srcloc";
             let kind = llvm::LLVMGetMDKindIDInContext(
                 bx.llcx,
-                key.as_ptr() as *const c_char,
+                key.as_ptr().cast::<c_char>(),
                 key.len() as c_uint,
             );
 
-            // srcloc contains one integer for each line of assembly code.
-            // Unfortunately this isn't enough to encode a full span so instead
-            // we just encode the start position of each line.
-            // FIXME: Figure out a way to pass the entire line spans.
+            // `srcloc` contains one 64-bit integer for each line of assembly code,
+            // where the lower 32 bits hold the lo byte position and the upper 32 bits
+            // hold the hi byte position.
             let mut srcloc = vec![];
             if dia == llvm::AsmDialect::Intel && line_spans.len() > 1 {
                 // LLVM inserts an extra line to add the ".intel_syntax", so add
@@ -521,13 +528,13 @@ pub(crate) fn inline_asm_call<'ll>(
                 // due to the asm template string coming from a macro. LLVM will
                 // default to the first srcloc for lines that don't have an
                 // associated srcloc.
-                srcloc.push(llvm::LLVMValueAsMetadata(bx.const_i32(0)));
+                srcloc.push(llvm::LLVMValueAsMetadata(bx.const_u64(0)));
             }
-            srcloc.extend(
-                line_spans
-                    .iter()
-                    .map(|span| llvm::LLVMValueAsMetadata(bx.const_i32(span.lo().to_u32() as i32))),
-            );
+            srcloc.extend(line_spans.iter().map(|span| {
+                llvm::LLVMValueAsMetadata(bx.const_u64(
+                    u64::from(span.lo().to_u32()) | (u64::from(span.hi().to_u32()) << 32),
+                ))
+            }));
             let md = llvm::LLVMMDNodeInContext2(bx.llcx, srcloc.as_ptr(), srcloc.len());
             let md = llvm::LLVMMetadataAsValue(&bx.llcx, md);
             llvm::LLVMSetMetadata(call, kind, md);
diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs
index cb958c1d4d7..f8454fd9960 100644
--- a/compiler/rustc_codegen_llvm/src/attributes.rs
+++ b/compiler/rustc_codegen_llvm/src/attributes.rs
@@ -1,6 +1,6 @@
 //! Set and unset common attributes on LLVM values.
 
-use rustc_attr::{InlineAttr, InstructionSetAttr, OptimizeAttr};
+use rustc_attr_parsing::{InlineAttr, InstructionSetAttr, OptimizeAttr};
 use rustc_codegen_ssa::traits::*;
 use rustc_hir::def_id::DefId;
 use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, PatchableFunctionEntry};
@@ -395,17 +395,9 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>(
         to_add.push(MemoryEffects::None.create_attr(cx.llcx));
     }
     if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED) {
-        to_add.push(AttributeKind::Naked.create_attr(cx.llcx));
-        // HACK(jubilee): "indirect branch tracking" works by attaching prologues to functions.
-        // And it is a module-level attribute, so the alternative is pulling naked functions into
-        // new LLVM modules. Otherwise LLVM's "naked" functions come with endbr prefixes per
-        // https://github.com/rust-lang/rust/issues/98768
-        to_add.push(AttributeKind::NoCfCheck.create_attr(cx.llcx));
-        if llvm_util::get_version() < (19, 0, 0) {
-            // Prior to LLVM 19, branch-target-enforcement was disabled by setting the attribute to
-            // the string "false". Now it is disabled by absence of the attribute.
-            to_add.push(llvm::CreateAttrStringValue(cx.llcx, "branch-target-enforcement", "false"));
-        }
+        // do nothing; a naked function is converted into an extern function
+        // and a global assembly block. LLVM's support for naked functions is
+        // not used.
     } else {
         // Do not set sanitizer attributes for naked functions.
         to_add.extend(sanitize_attrs(cx, codegen_fn_attrs.no_sanitize));
diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs
index f6d2ec24da6..4adf99e91d0 100644
--- a/compiler/rustc_codegen_llvm/src/back/lto.rs
+++ b/compiler/rustc_codegen_llvm/src/back/lto.rs
@@ -604,7 +604,14 @@ pub(crate) fn run_pass_manager(
     debug!("running the pass manager");
     let opt_stage = if thin { llvm::OptStage::ThinLTO } else { llvm::OptStage::FatLTO };
     let opt_level = config.opt_level.unwrap_or(config::OptLevel::No);
-    unsafe { write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage) }?;
+
+    // If this rustc version was build with enzyme/autodiff enabled, and if users applied the
+    // `#[autodiff]` macro at least once, then we will later call llvm_optimize a second time.
+    let first_run = true;
+    debug!("running llvm pm opt pipeline");
+    unsafe {
+        write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, first_run)?;
+    }
     debug!("lto done");
     Ok(())
 }
diff --git a/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs b/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
index 44c30d22a9e..4cbd49aa44d 100644
--- a/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
+++ b/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
@@ -25,7 +25,7 @@ impl OwnedTargetMachine {
         model: llvm::CodeModel,
         reloc: llvm::RelocModel,
         level: llvm::CodeGenOptLevel,
-        use_soft_fp: bool,
+        float_abi: llvm::FloatAbi,
         function_sections: bool,
         data_sections: bool,
         unique_section_names: bool,
@@ -57,7 +57,7 @@ impl OwnedTargetMachine {
                 model,
                 reloc,
                 level,
-                use_soft_fp,
+                float_abi,
                 function_sections,
                 data_sections,
                 unique_section_names,
diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs
index bde6668929c..509b24dd703 100644
--- a/compiler/rustc_codegen_llvm/src/back/write.rs
+++ b/compiler/rustc_codegen_llvm/src/back/write.rs
@@ -25,10 +25,9 @@ use rustc_session::Session;
 use rustc_session::config::{
     self, Lto, OutputType, Passes, RemapPathScopeComponents, SplitDwarfKind, SwitchWithOptPath,
 };
-use rustc_span::InnerSpan;
-use rustc_span::symbol::sym;
-use rustc_target::spec::{CodeModel, RelocModel, SanitizerSet, SplitDebuginfo, TlsModel};
-use tracing::debug;
+use rustc_span::{BytePos, InnerSpan, Pos, SpanData, SyntaxContext, sym};
+use rustc_target::spec::{CodeModel, FloatAbi, RelocModel, SanitizerSet, SplitDebuginfo, TlsModel};
+use tracing::{debug, trace};
 
 use crate::back::lto::ThinBuffer;
 use crate::back::owned_target_machine::OwnedTargetMachine;
@@ -182,6 +181,14 @@ pub(crate) fn to_llvm_code_model(code_model: Option<CodeModel>) -> llvm::CodeMod
     }
 }
 
+fn to_llvm_float_abi(float_abi: Option<FloatAbi>) -> llvm::FloatAbi {
+    match float_abi {
+        None => llvm::FloatAbi::Default,
+        Some(FloatAbi::Soft) => llvm::FloatAbi::Soft,
+        Some(FloatAbi::Hard) => llvm::FloatAbi::Hard,
+    }
+}
+
 pub(crate) fn target_machine_factory(
     sess: &Session,
     optlvl: config::OptLevel,
@@ -190,12 +197,12 @@ pub(crate) fn target_machine_factory(
     let reloc_model = to_llvm_relocation_model(sess.relocation_model());
 
     let (opt_level, _) = to_llvm_opt_settings(optlvl);
-    let use_softfp = if sess.target.arch == "arm" && sess.target.abi == "eabihf" {
-        sess.opts.cg.soft_float
+    let float_abi = if sess.target.arch == "arm" && sess.opts.cg.soft_float {
+        llvm::FloatAbi::Soft
     } else {
         // `validate_commandline_args_with_session_available` has already warned about this being
         // ignored. Let's make sure LLVM doesn't suddenly start using this flag on more targets.
-        false
+        to_llvm_float_abi(sess.target.llvm_floatabi)
     };
 
     let ffunction_sections =
@@ -291,7 +298,7 @@ pub(crate) fn target_machine_factory(
             code_model,
             reloc_model,
             opt_level,
-            use_softfp,
+            float_abi,
             ffunction_sections,
             fdata_sections,
             funique_section_names,
@@ -415,21 +422,32 @@ fn report_inline_asm(
     cgcx: &CodegenContext<LlvmCodegenBackend>,
     msg: String,
     level: llvm::DiagnosticLevel,
-    mut cookie: u64,
+    cookie: u64,
     source: Option<(String, Vec<InnerSpan>)>,
 ) {
     // In LTO build we may get srcloc values from other crates which are invalid
     // since they use a different source map. To be safe we just suppress these
     // in LTO builds.
-    if matches!(cgcx.lto, Lto::Fat | Lto::Thin) {
-        cookie = 0;
-    }
+    let span = if cookie == 0 || matches!(cgcx.lto, Lto::Fat | Lto::Thin) {
+        SpanData::default()
+    } else {
+        let lo = BytePos::from_u32(cookie as u32);
+        let hi = BytePos::from_u32((cookie >> 32) as u32);
+        SpanData {
+            lo,
+            // LLVM version < 19 silently truncates the cookie to 32 bits in some situations.
+            hi: if hi.to_u32() != 0 { hi } else { lo },
+            ctxt: SyntaxContext::root(),
+            parent: None,
+        }
+    };
     let level = match level {
         llvm::DiagnosticLevel::Error => Level::Error,
         llvm::DiagnosticLevel::Warning => Level::Warning,
         llvm::DiagnosticLevel::Note | llvm::DiagnosticLevel::Remark => Level::Note,
     };
-    cgcx.diag_emitter.inline_asm_error(cookie.try_into().unwrap(), msg, level, source);
+    let msg = msg.strip_prefix("error: ").unwrap_or(&msg).to_string();
+    cgcx.diag_emitter.inline_asm_error(span, msg, level, source);
 }
 
 unsafe extern "C" fn diagnostic_handler(info: &DiagnosticInfo, user: *mut c_void) {
@@ -519,9 +537,35 @@ pub(crate) unsafe fn llvm_optimize(
     config: &ModuleConfig,
     opt_level: config::OptLevel,
     opt_stage: llvm::OptStage,
+    skip_size_increasing_opts: bool,
 ) -> Result<(), FatalError> {
-    let unroll_loops =
-        opt_level != config::OptLevel::Size && opt_level != config::OptLevel::SizeMin;
+    // Enzyme:
+    // The whole point of compiler based AD is to differentiate optimized IR instead of unoptimized
+    // source code. However, benchmarks show that optimizations increasing the code size
+    // tend to reduce AD performance. Therefore deactivate them before AD, then differentiate the code
+    // and finally re-optimize the module, now with all optimizations available.
+    // FIXME(ZuseZ4): In a future update we could figure out how to only optimize individual functions getting
+    // differentiated.
+
+    let unroll_loops;
+    let vectorize_slp;
+    let vectorize_loop;
+
+    // When we build rustc with enzyme/autodiff support, we want to postpone size-increasing
+    // optimizations until after differentiation. FIXME(ZuseZ4): Before shipping on nightly,
+    // we should make this more granular, or at least check that the user has at least one autodiff
+    // call in their code, to justify altering the compilation pipeline.
+    if skip_size_increasing_opts && cfg!(llvm_enzyme) {
+        unroll_loops = false;
+        vectorize_slp = false;
+        vectorize_loop = false;
+    } else {
+        unroll_loops =
+            opt_level != config::OptLevel::Size && opt_level != config::OptLevel::SizeMin;
+        vectorize_slp = config.vectorize_slp;
+        vectorize_loop = config.vectorize_loop;
+    }
+    trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop);
     let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
     let pgo_gen_path = get_pgo_gen_path(config);
     let pgo_use_path = get_pgo_use_path(config);
@@ -585,8 +629,8 @@ pub(crate) unsafe fn llvm_optimize(
             using_thin_buffers,
             config.merge_functions,
             unroll_loops,
-            config.vectorize_slp,
-            config.vectorize_loop,
+            vectorize_slp,
+            vectorize_loop,
             config.no_builtins,
             config.emit_lifetime_markers,
             sanitizer_options.as_ref(),
@@ -630,6 +674,8 @@ pub(crate) unsafe fn optimize(
         unsafe { llvm::LLVMWriteBitcodeToFile(llmod, out.as_ptr()) };
     }
 
+    // FIXME(ZuseZ4): support SanitizeHWAddress and prevent illegal/unsupported opts
+
     if let Some(opt_level) = config.opt_level {
         let opt_stage = match cgcx.lto {
             Lto::Fat => llvm::OptStage::PreLinkFatLTO,
@@ -637,7 +683,20 @@ pub(crate) unsafe fn optimize(
             _ if cgcx.opts.cg.linker_plugin_lto.enabled() => llvm::OptStage::PreLinkThinLTO,
             _ => llvm::OptStage::PreLinkNoLTO,
         };
-        return unsafe { llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage) };
+
+        // If we know that we will later run AD, then we disable vectorization and loop unrolling
+        let skip_size_increasing_opts = cfg!(llvm_enzyme);
+        return unsafe {
+            llvm_optimize(
+                cgcx,
+                dcx,
+                module,
+                config,
+                opt_level,
+                opt_stage,
+                skip_size_increasing_opts,
+            )
+        };
     }
     Ok(())
 }
diff --git a/compiler/rustc_codegen_llvm/src/base.rs b/compiler/rustc_codegen_llvm/src/base.rs
index f62310bd948..d05faf5577b 100644
--- a/compiler/rustc_codegen_llvm/src/base.rs
+++ b/compiler/rustc_codegen_llvm/src/base.rs
@@ -23,7 +23,7 @@ use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
 use rustc_middle::mir::mono::{Linkage, Visibility};
 use rustc_middle::ty::TyCtxt;
 use rustc_session::config::DebugInfo;
-use rustc_span::symbol::Symbol;
+use rustc_span::Symbol;
 use rustc_target::spec::SanitizerSet;
 
 use super::ModuleLlvm;
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index b5bb7630ca6..5a34b52e6ef 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -2,6 +2,8 @@ use std::borrow::Cow;
 use std::ops::Deref;
 use std::{iter, ptr};
 
+pub(crate) mod autodiff;
+
 use libc::{c_char, c_uint};
 use rustc_abi as abi;
 use rustc_abi::{Align, Size, WrappingRange};
@@ -608,14 +610,6 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     }
 
     fn range_metadata(&mut self, load: &'ll Value, range: WrappingRange) {
-        if self.sess().target.arch == "amdgpu" {
-            // amdgpu/LLVM does something weird and thinks an i64 value is
-            // split into a v2i32, halving the bitwidth LLVM expects,
-            // tripping an assertion. So, for now, just disable this
-            // optimization.
-            return;
-        }
-
         if self.cx.sess().opts.optimize == OptLevel::No {
             // Don't emit metadata we're not going to use
             return;
diff --git a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
new file mode 100644
index 00000000000..38f7eaa090f
--- /dev/null
+++ b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
@@ -0,0 +1,344 @@
+use std::ptr;
+
+use rustc_ast::expand::autodiff_attrs::{AutoDiffAttrs, AutoDiffItem, DiffActivity, DiffMode};
+use rustc_codegen_ssa::ModuleCodegen;
+use rustc_codegen_ssa::back::write::ModuleConfig;
+use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, BuilderMethods};
+use rustc_errors::FatalError;
+use rustc_middle::ty::TyCtxt;
+use rustc_session::config::Lto;
+use tracing::{debug, trace};
+
+use crate::back::write::{llvm_err, llvm_optimize};
+use crate::builder::Builder;
+use crate::declare::declare_raw_fn;
+use crate::errors::LlvmError;
+use crate::llvm::AttributePlace::Function;
+use crate::llvm::{Metadata, True};
+use crate::value::Value;
+use crate::{CodegenContext, LlvmCodegenBackend, ModuleLlvm, attributes, context, llvm};
+
+fn get_params(fnc: &Value) -> Vec<&Value> {
+    unsafe {
+        let param_num = llvm::LLVMCountParams(fnc) as usize;
+        let mut fnc_args: Vec<&Value> = vec![];
+        fnc_args.reserve(param_num);
+        llvm::LLVMGetParams(fnc, fnc_args.as_mut_ptr());
+        fnc_args.set_len(param_num);
+        fnc_args
+    }
+}
+
+/// When differentiating `fn_to_diff`, take a `outer_fn` and generate another
+/// function with expected naming and calling conventions[^1] which will be
+/// discovered by the enzyme LLVM pass and its body populated with the differentiated
+/// `fn_to_diff`. `outer_fn` is then modified to have a call to the generated
+/// function and handle the differences between the Rust calling convention and
+/// Enzyme.
+/// [^1]: <https://enzyme.mit.edu/getting_started/CallingConvention/>
+// FIXME(ZuseZ4): `outer_fn` should include upstream safety checks to
+// cover some assumptions of enzyme/autodiff, which could lead to UB otherwise.
+fn generate_enzyme_call<'ll, 'tcx>(
+    cx: &context::CodegenCx<'ll, 'tcx>,
+    fn_to_diff: &'ll Value,
+    outer_fn: &'ll Value,
+    attrs: AutoDiffAttrs,
+) {
+    let inputs = attrs.input_activity;
+    let output = attrs.ret_activity;
+
+    // We have to pick the name depending on whether we want forward or reverse mode autodiff.
+    // FIXME(ZuseZ4): The new pass based approach should not need the {Forward/Reverse}First method anymore, since
+    // it will handle higher-order derivatives correctly automatically (in theory). Currently
+    // higher-order derivatives fail, so we should debug that before adjusting this code.
+    let mut ad_name: String = match attrs.mode {
+        DiffMode::Forward => "__enzyme_fwddiff",
+        DiffMode::Reverse => "__enzyme_autodiff",
+        DiffMode::ForwardFirst => "__enzyme_fwddiff",
+        DiffMode::ReverseFirst => "__enzyme_autodiff",
+        _ => panic!("logic bug in autodiff, unrecognized mode"),
+    }
+    .to_string();
+
+    // add outer_fn name to ad_name to make it unique, in case users apply autodiff to multiple
+    // functions. Unwrap will only panic, if LLVM gave us an invalid string.
+    let name = llvm::get_value_name(outer_fn);
+    let outer_fn_name = std::ffi::CStr::from_bytes_with_nul(name).unwrap().to_str().unwrap();
+    ad_name.push_str(outer_fn_name.to_string().as_str());
+
+    // Let us assume the user wrote the following function square:
+    //
+    // ```llvm
+    // define double @square(double %x) {
+    // entry:
+    //  %0 = fmul double %x, %x
+    //  ret double %0
+    // }
+    // ```
+    //
+    // The user now applies autodiff to the function square, in which case fn_to_diff will be `square`.
+    // Our macro generates the following placeholder code (slightly simplified):
+    //
+    // ```llvm
+    // define double @dsquare(double %x) {
+    //  ; placeholder code
+    //  return 0.0;
+    // }
+    // ```
+    //
+    // so our `outer_fn` will be `dsquare`. The unsafe code section below now removes the placeholder
+    // code and inserts an autodiff call. We also add a declaration for the __enzyme_autodiff call.
+    // Again, the arguments to all functions are slightly simplified.
+    // ```llvm
+    // declare double @__enzyme_autodiff_square(...)
+    //
+    // define double @dsquare(double %x) {
+    // entry:
+    //   %0 = tail call double (...) @__enzyme_autodiff_square(double (double)* nonnull @square, double %x)
+    //   ret double %0
+    // }
+    // ```
+    unsafe {
+        // On LLVM-IR, we can luckily declare __enzyme_ functions without specifying the input
+        // arguments. We do however need to declare them with their correct return type.
+        // We already figured the correct return type out in our frontend, when generating the outer_fn,
+        // so we can now just go ahead and use that. FIXME(ZuseZ4): This doesn't handle sret yet.
+        let fn_ty = llvm::LLVMGlobalGetValueType(outer_fn);
+        let ret_ty = llvm::LLVMGetReturnType(fn_ty);
+
+        // LLVM can figure out the input types on it's own, so we take a shortcut here.
+        let enzyme_ty = llvm::LLVMFunctionType(ret_ty, ptr::null(), 0, True);
+
+        //FIXME(ZuseZ4): the CC/Addr/Vis values are best effort guesses, we should look at tests and
+        // think a bit more about what should go here.
+        let cc = llvm::LLVMGetFunctionCallConv(outer_fn);
+        let ad_fn = declare_raw_fn(
+            cx,
+            &ad_name,
+            llvm::CallConv::try_from(cc).expect("invalid callconv"),
+            llvm::UnnamedAddr::No,
+            llvm::Visibility::Default,
+            enzyme_ty,
+        );
+
+        // Otherwise LLVM might inline our temporary code before the enzyme pass has a chance to
+        // do it's work.
+        let attr = llvm::AttributeKind::NoInline.create_attr(cx.llcx);
+        attributes::apply_to_llfn(ad_fn, Function, &[attr]);
+
+        // first, remove all calls from fnc
+        let entry = llvm::LLVMGetFirstBasicBlock(outer_fn);
+        let br = llvm::LLVMRustGetTerminator(entry);
+        llvm::LLVMRustEraseInstFromParent(br);
+
+        let last_inst = llvm::LLVMRustGetLastInstruction(entry).unwrap();
+        let mut builder = Builder::build(cx, entry);
+
+        let num_args = llvm::LLVMCountParams(&fn_to_diff);
+        let mut args = Vec::with_capacity(num_args as usize + 1);
+        args.push(fn_to_diff);
+
+        let enzyme_const = cx.create_metadata("enzyme_const".to_string()).unwrap();
+        let enzyme_out = cx.create_metadata("enzyme_out".to_string()).unwrap();
+        let enzyme_dup = cx.create_metadata("enzyme_dup".to_string()).unwrap();
+        let enzyme_dupnoneed = cx.create_metadata("enzyme_dupnoneed".to_string()).unwrap();
+        let enzyme_primal_ret = cx.create_metadata("enzyme_primal_return".to_string()).unwrap();
+
+        match output {
+            DiffActivity::Dual => {
+                args.push(cx.get_metadata_value(enzyme_primal_ret));
+            }
+            DiffActivity::Active => {
+                args.push(cx.get_metadata_value(enzyme_primal_ret));
+            }
+            _ => {}
+        }
+
+        trace!("matching autodiff arguments");
+        // We now handle the issue that Rust level arguments not always match the llvm-ir level
+        // arguments. A slice, `&[f32]`, for example, is represented as a pointer and a length on
+        // llvm-ir level. The number of activities matches the number of Rust level arguments, so we
+        // need to match those.
+        // FIXME(ZuseZ4): This logic is a bit more complicated than it should be, can we simplify it
+        // using iterators and peek()?
+        let mut outer_pos: usize = 0;
+        let mut activity_pos = 0;
+        let outer_args: Vec<&llvm::Value> = get_params(outer_fn);
+        while activity_pos < inputs.len() {
+            let activity = inputs[activity_pos as usize];
+            // Duplicated arguments received a shadow argument, into which enzyme will write the
+            // gradient.
+            let (activity, duplicated): (&Metadata, bool) = match activity {
+                DiffActivity::None => panic!("not a valid input activity"),
+                DiffActivity::Const => (enzyme_const, false),
+                DiffActivity::Active => (enzyme_out, false),
+                DiffActivity::ActiveOnly => (enzyme_out, false),
+                DiffActivity::Dual => (enzyme_dup, true),
+                DiffActivity::DualOnly => (enzyme_dupnoneed, true),
+                DiffActivity::Duplicated => (enzyme_dup, true),
+                DiffActivity::DuplicatedOnly => (enzyme_dupnoneed, true),
+                DiffActivity::FakeActivitySize => (enzyme_const, false),
+            };
+            let outer_arg = outer_args[outer_pos];
+            args.push(cx.get_metadata_value(activity));
+            args.push(outer_arg);
+            if duplicated {
+                // We know that duplicated args by construction have a following argument,
+                // so this can not be out of bounds.
+                let next_outer_arg = outer_args[outer_pos + 1];
+                let next_outer_ty = cx.val_ty(next_outer_arg);
+                // FIXME(ZuseZ4): We should add support for Vec here too, but it's less urgent since
+                // vectors behind references (&Vec<T>) are already supported. Users can not pass a
+                // Vec by value for reverse mode, so this would only help forward mode autodiff.
+                let slice = {
+                    if activity_pos + 1 >= inputs.len() {
+                        // If there is no arg following our ptr, it also can't be a slice,
+                        // since that would lead to a ptr, int pair.
+                        false
+                    } else {
+                        let next_activity = inputs[activity_pos + 1];
+                        // We analyze the MIR types and add this dummy activity if we visit a slice.
+                        next_activity == DiffActivity::FakeActivitySize
+                    }
+                };
+                if slice {
+                    // A duplicated slice will have the following two outer_fn arguments:
+                    // (..., ptr1, int1, ptr2, int2, ...). We add the following llvm-ir to our __enzyme call:
+                    // (..., metadata! enzyme_dup, ptr, ptr, int1, ...).
+                    // FIXME(ZuseZ4): We will upstream a safety check later which asserts that
+                    // int2 >= int1, which means the shadow vector is large enough to store the gradient.
+                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Integer);
+                    let next_outer_arg2 = outer_args[outer_pos + 2];
+                    let next_outer_ty2 = cx.val_ty(next_outer_arg2);
+                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty2) == llvm::TypeKind::Pointer);
+                    let next_outer_arg3 = outer_args[outer_pos + 3];
+                    let next_outer_ty3 = cx.val_ty(next_outer_arg3);
+                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty3) == llvm::TypeKind::Integer);
+                    args.push(next_outer_arg2);
+                    args.push(cx.get_metadata_value(enzyme_const));
+                    args.push(next_outer_arg);
+                    outer_pos += 4;
+                    activity_pos += 2;
+                } else {
+                    // A duplicated pointer will have the following two outer_fn arguments:
+                    // (..., ptr, ptr, ...). We add the following llvm-ir to our __enzyme call:
+                    // (..., metadata! enzyme_dup, ptr, ptr, ...).
+                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Pointer);
+                    args.push(next_outer_arg);
+                    outer_pos += 2;
+                    activity_pos += 1;
+                }
+            } else {
+                // We do not differentiate with resprect to this argument.
+                // We already added the metadata and argument above, so just increase the counters.
+                outer_pos += 1;
+                activity_pos += 1;
+            }
+        }
+
+        let call = builder.call(enzyme_ty, None, None, ad_fn, &args, None, None);
+
+        // This part is a bit iffy. LLVM requires that a call to an inlineable function has some
+        // metadata attachted to it, but we just created this code oota. Given that the
+        // differentiated function already has partly confusing metadata, and given that this
+        // affects nothing but the auttodiff IR, we take a shortcut and just steal metadata from the
+        // dummy code which we inserted at a higher level.
+        // FIXME(ZuseZ4): Work with Enzyme core devs to clarify what debug metadata issues we have,
+        // and how to best improve it for enzyme core and rust-enzyme.
+        let md_ty = cx.get_md_kind_id("dbg");
+        if llvm::LLVMRustHasMetadata(last_inst, md_ty) {
+            let md = llvm::LLVMRustDIGetInstMetadata(last_inst)
+                .expect("failed to get instruction metadata");
+            let md_todiff = cx.get_metadata_value(md);
+            llvm::LLVMSetMetadata(call, md_ty, md_todiff);
+        } else {
+            // We don't panic, since depending on whether we are in debug or release mode, we might
+            // have no debug info to copy, which would then be ok.
+            trace!("no dbg info");
+        }
+        // Now that we copied the metadata, get rid of dummy code.
+        llvm::LLVMRustEraseInstBefore(entry, last_inst);
+        llvm::LLVMRustEraseInstFromParent(last_inst);
+
+        if cx.val_ty(outer_fn) != cx.type_void() {
+            builder.ret(call);
+        } else {
+            builder.ret_void();
+        }
+
+        // Let's crash in case that we messed something up above and generated invalid IR.
+        llvm::LLVMRustVerifyFunction(
+            outer_fn,
+            llvm::LLVMRustVerifierFailureAction::LLVMAbortProcessAction,
+        );
+    }
+}
+
+pub(crate) fn differentiate<'ll, 'tcx>(
+    module: &'ll ModuleCodegen<ModuleLlvm>,
+    cgcx: &CodegenContext<LlvmCodegenBackend>,
+    tcx: TyCtxt<'tcx>,
+    diff_items: Vec<AutoDiffItem>,
+    config: &ModuleConfig,
+) -> Result<(), FatalError> {
+    for item in &diff_items {
+        trace!("{}", item);
+    }
+
+    let diag_handler = cgcx.create_dcx();
+    let (_, cgus) = tcx.collect_and_partition_mono_items(());
+    let cx = context::CodegenCx::new(tcx, &cgus.first().unwrap(), &module.module_llvm);
+
+    // Before dumping the module, we want all the TypeTrees to become part of the module.
+    for item in diff_items.iter() {
+        let name = item.source.clone();
+        let fn_def: Option<&llvm::Value> = cx.get_function(&name);
+        let Some(fn_def) = fn_def else {
+            return Err(llvm_err(diag_handler.handle(), LlvmError::PrepareAutoDiff {
+                src: item.source.clone(),
+                target: item.target.clone(),
+                error: "could not find source function".to_owned(),
+            }));
+        };
+        debug!(?item.target);
+        let fn_target: Option<&llvm::Value> = cx.get_function(&item.target);
+        let Some(fn_target) = fn_target else {
+            return Err(llvm_err(diag_handler.handle(), LlvmError::PrepareAutoDiff {
+                src: item.source.clone(),
+                target: item.target.clone(),
+                error: "could not find target function".to_owned(),
+            }));
+        };
+
+        generate_enzyme_call(&cx, fn_def, fn_target, item.attrs.clone());
+    }
+
+    // FIXME(ZuseZ4): support SanitizeHWAddress and prevent illegal/unsupported opts
+
+    if let Some(opt_level) = config.opt_level {
+        let opt_stage = match cgcx.lto {
+            Lto::Fat => llvm::OptStage::PreLinkFatLTO,
+            Lto::Thin | Lto::ThinLocal => llvm::OptStage::PreLinkThinLTO,
+            _ if cgcx.opts.cg.linker_plugin_lto.enabled() => llvm::OptStage::PreLinkThinLTO,
+            _ => llvm::OptStage::PreLinkNoLTO,
+        };
+        // This is our second opt call, so now we run all opts,
+        // to make sure we get the best performance.
+        let skip_size_increasing_opts = false;
+        trace!("running Module Optimization after differentiation");
+        unsafe {
+            llvm_optimize(
+                cgcx,
+                diag_handler.handle(),
+                module,
+                config,
+                opt_level,
+                opt_stage,
+                skip_size_increasing_opts,
+            )?
+        };
+    }
+    trace!("done with differentiate()");
+
+    Ok(())
+}
diff --git a/compiler/rustc_codegen_llvm/src/callee.rs b/compiler/rustc_codegen_llvm/src/callee.rs
index ec77f32caf4..aa9a0f34f55 100644
--- a/compiler/rustc_codegen_llvm/src/callee.rs
+++ b/compiler/rustc_codegen_llvm/src/callee.rs
@@ -106,7 +106,7 @@ pub(crate) fn get_fn<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, instance: Instance<'t
                 // This is a monomorphization of a generic function.
                 if !(cx.tcx.sess.opts.share_generics()
                     || tcx.codegen_fn_attrs(instance_def_id).inline
-                        == rustc_attr::InlineAttr::Never)
+                        == rustc_attr_parsing::InlineAttr::Never)
                 {
                     // When not sharing generics, all instances are in the same
                     // crate and have hidden visibility.
diff --git a/compiler/rustc_codegen_llvm/src/common.rs b/compiler/rustc_codegen_llvm/src/common.rs
index 8852dec7d9f..adfe8aeb5c5 100644
--- a/compiler/rustc_codegen_llvm/src/common.rs
+++ b/compiler/rustc_codegen_llvm/src/common.rs
@@ -302,10 +302,9 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
                             (value, AddressSpace::DATA)
                         }
                     }
-                    GlobalAlloc::Function { instance, .. } => (
-                        self.get_fn_addr(instance.polymorphize(self.tcx)),
-                        self.data_layout().instruction_address_space,
-                    ),
+                    GlobalAlloc::Function { instance, .. } => {
+                        (self.get_fn_addr(instance), self.data_layout().instruction_address_space)
+                    }
                     GlobalAlloc::VTable(ty, dyn_ty) => {
                         let alloc = self
                             .tcx
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index 8218126ea29..d8fbe51b975 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -1,6 +1,6 @@
 use std::borrow::Borrow;
 use std::cell::{Cell, RefCell};
-use std::ffi::{CStr, c_uint};
+use std::ffi::{CStr, c_char, c_uint};
 use std::str;
 
 use rustc_abi::{HasDataLayout, TargetDataLayout, VariantIdx};
@@ -159,6 +159,16 @@ pub(crate) unsafe fn create_module<'ll>(
             // See https://github.com/llvm/llvm-project/pull/112084
             target_data_layout = target_data_layout.replace("-i128:128", "");
         }
+        if sess.target.arch.starts_with("powerpc64") {
+            // LLVM 20 updates the powerpc64 layout to correctly align 128 bit integers to 128 bit.
+            // See https://github.com/llvm/llvm-project/pull/118004
+            target_data_layout = target_data_layout.replace("-i128:128", "");
+        }
+        if sess.target.arch.starts_with("wasm32") || sess.target.arch.starts_with("wasm64") {
+            // LLVM 20 updates the wasm(32|64) layout to correctly align 128 bit integers to 128 bit.
+            // See https://github.com/llvm/llvm-project/pull/119204
+            target_data_layout = target_data_layout.replace("-i128:128", "");
+        }
     }
 
     // Ensure the data-layout values hardcoded remain the defaults.
@@ -590,6 +600,31 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
             llvm::set_section(g, c"llvm.metadata");
         }
     }
+
+    pub(crate) fn get_metadata_value(&self, metadata: &'ll Metadata) -> &'ll Value {
+        unsafe { llvm::LLVMMetadataAsValue(self.llcx, metadata) }
+    }
+
+    pub(crate) fn get_function(&self, name: &str) -> Option<&'ll Value> {
+        let name = SmallCStr::new(name);
+        unsafe { llvm::LLVMGetNamedFunction(self.llmod, name.as_ptr()) }
+    }
+
+    pub(crate) fn get_md_kind_id(&self, name: &str) -> u32 {
+        unsafe {
+            llvm::LLVMGetMDKindIDInContext(
+                self.llcx,
+                name.as_ptr() as *const c_char,
+                name.len() as c_uint,
+            )
+        }
+    }
+
+    pub(crate) fn create_metadata(&self, name: String) -> Option<&'ll Metadata> {
+        Some(unsafe {
+            llvm::LLVMMDStringInContext2(self.llcx, name.as_ptr() as *const c_char, name.len())
+        })
+    }
 }
 
 impl<'ll, 'tcx> MiscCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs
index a6e07ea2a60..b617f4d37f5 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs
@@ -1,6 +1,4 @@
-use rustc_middle::mir::coverage::{CounterId, CovTerm, ExpressionId, SourceRegion};
-
-use crate::coverageinfo::mapgen::LocalFileId;
+use rustc_middle::mir::coverage::{CounterId, CovTerm, ExpressionId};
 
 /// Must match the layout of `LLVMRustCounterKind`.
 #[derive(Copy, Clone, Debug)]
@@ -126,29 +124,43 @@ pub(crate) struct CoverageSpan {
     /// Local index into the function's local-to-global file ID table.
     /// The value at that index is itself an index into the coverage filename
     /// table in the CGU's `__llvm_covmap` section.
-    file_id: u32,
+    pub(crate) file_id: u32,
 
     /// 1-based starting line of the source code span.
-    start_line: u32,
+    pub(crate) start_line: u32,
     /// 1-based starting column of the source code span.
-    start_col: u32,
+    pub(crate) start_col: u32,
     /// 1-based ending line of the source code span.
-    end_line: u32,
+    pub(crate) end_line: u32,
     /// 1-based ending column of the source code span. High bit must be unset.
-    end_col: u32,
+    pub(crate) end_col: u32,
+}
+
+/// Holds tables of the various region types in one struct.
+///
+/// Don't pass this struct across FFI; pass the individual region tables as
+/// pointer/length pairs instead.
+///
+/// Each field name has a `_regions` suffix for improved readability after
+/// exhaustive destructing, which ensures that all region types are handled.
+#[derive(Clone, Debug, Default)]
+pub(crate) struct Regions {
+    pub(crate) code_regions: Vec<CodeRegion>,
+    pub(crate) branch_regions: Vec<BranchRegion>,
+    pub(crate) mcdc_branch_regions: Vec<MCDCBranchRegion>,
+    pub(crate) mcdc_decision_regions: Vec<MCDCDecisionRegion>,
 }
 
-impl CoverageSpan {
-    pub(crate) fn from_source_region(
-        local_file_id: LocalFileId,
-        code_region: &SourceRegion,
-    ) -> Self {
-        let file_id = local_file_id.as_u32();
-        let &SourceRegion { start_line, start_col, end_line, end_col } = code_region;
-        // Internally, LLVM uses the high bit of `end_col` to distinguish between
-        // code regions and gap regions, so it can't be used by the column number.
-        assert!(end_col & (1u32 << 31) == 0, "high bit of `end_col` must be unset: {end_col:#X}");
-        Self { file_id, start_line, start_col, end_line, end_col }
+impl Regions {
+    /// Returns true if none of this structure's tables contain any regions.
+    pub(crate) fn has_no_regions(&self) -> bool {
+        let Self { code_regions, branch_regions, mcdc_branch_regions, mcdc_decision_regions } =
+            self;
+
+        code_regions.is_empty()
+            && branch_regions.is_empty()
+            && mcdc_branch_regions.is_empty()
+            && mcdc_decision_regions.is_empty()
     }
 }
 
@@ -156,7 +168,7 @@ impl CoverageSpan {
 #[derive(Clone, Debug)]
 #[repr(C)]
 pub(crate) struct CodeRegion {
-    pub(crate) span: CoverageSpan,
+    pub(crate) cov_span: CoverageSpan,
     pub(crate) counter: Counter,
 }
 
@@ -164,7 +176,7 @@ pub(crate) struct CodeRegion {
 #[derive(Clone, Debug)]
 #[repr(C)]
 pub(crate) struct BranchRegion {
-    pub(crate) span: CoverageSpan,
+    pub(crate) cov_span: CoverageSpan,
     pub(crate) true_counter: Counter,
     pub(crate) false_counter: Counter,
 }
@@ -173,7 +185,7 @@ pub(crate) struct BranchRegion {
 #[derive(Clone, Debug)]
 #[repr(C)]
 pub(crate) struct MCDCBranchRegion {
-    pub(crate) span: CoverageSpan,
+    pub(crate) cov_span: CoverageSpan,
     pub(crate) true_counter: Counter,
     pub(crate) false_counter: Counter,
     pub(crate) mcdc_branch_params: mcdc::BranchParameters,
@@ -183,6 +195,6 @@ pub(crate) struct MCDCBranchRegion {
 #[derive(Clone, Debug)]
 #[repr(C)]
 pub(crate) struct MCDCDecisionRegion {
-    pub(crate) span: CoverageSpan,
+    pub(crate) cov_span: CoverageSpan,
     pub(crate) mcdc_decision_params: mcdc::DecisionParameters,
 }
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs
index 99c2d12b261..2cd7fa3225a 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs
@@ -40,11 +40,10 @@ pub(crate) fn create_pgo_func_name_var<'ll>(
     }
 }
 
-pub(crate) fn write_filenames_to_buffer<'a>(
-    filenames: impl IntoIterator<Item = &'a str>,
-) -> Vec<u8> {
+pub(crate) fn write_filenames_to_buffer(filenames: &[impl AsRef<str>]) -> Vec<u8> {
     let (pointers, lengths) = filenames
         .into_iter()
+        .map(AsRef::as_ref)
         .map(|s: &str| (s.as_c_char_ptr(), s.len()))
         .unzip::<_, _, Vec<_>, Vec<_>>();
 
@@ -62,11 +61,10 @@ pub(crate) fn write_filenames_to_buffer<'a>(
 pub(crate) fn write_function_mappings_to_buffer(
     virtual_file_mapping: &[u32],
     expressions: &[ffi::CounterExpression],
-    code_regions: &[ffi::CodeRegion],
-    branch_regions: &[ffi::BranchRegion],
-    mcdc_branch_regions: &[ffi::MCDCBranchRegion],
-    mcdc_decision_regions: &[ffi::MCDCDecisionRegion],
+    regions: &ffi::Regions,
 ) -> Vec<u8> {
+    let ffi::Regions { code_regions, branch_regions, mcdc_branch_regions, mcdc_decision_regions } =
+        regions;
     llvm::build_byte_buffer(|buffer| unsafe {
         llvm::LLVMRustCoverageWriteFunctionMappingsToBuffer(
             virtual_file_mapping.as_ptr(),
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs
deleted file mode 100644
index 0752c718c70..00000000000
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs
+++ /dev/null
@@ -1,232 +0,0 @@
-use rustc_data_structures::captures::Captures;
-use rustc_data_structures::fx::FxIndexSet;
-use rustc_index::bit_set::BitSet;
-use rustc_middle::mir::CoverageIdsInfo;
-use rustc_middle::mir::coverage::{
-    CounterId, CovTerm, Expression, ExpressionId, FunctionCoverageInfo, Mapping, MappingKind, Op,
-    SourceRegion,
-};
-use rustc_middle::ty::Instance;
-use tracing::debug;
-
-use crate::coverageinfo::ffi::{Counter, CounterExpression, ExprKind};
-
-/// Holds all of the coverage mapping data associated with a function instance,
-/// collected during traversal of `Coverage` statements in the function's MIR.
-#[derive(Debug)]
-pub(crate) struct FunctionCoverageCollector<'tcx> {
-    /// Coverage info that was attached to this function by the instrumentor.
-    function_coverage_info: &'tcx FunctionCoverageInfo,
-    ids_info: &'tcx CoverageIdsInfo,
-    is_used: bool,
-}
-
-impl<'tcx> FunctionCoverageCollector<'tcx> {
-    /// Creates a new set of coverage data for a used (called) function.
-    pub(crate) fn new(
-        instance: Instance<'tcx>,
-        function_coverage_info: &'tcx FunctionCoverageInfo,
-        ids_info: &'tcx CoverageIdsInfo,
-    ) -> Self {
-        Self::create(instance, function_coverage_info, ids_info, true)
-    }
-
-    /// Creates a new set of coverage data for an unused (never called) function.
-    pub(crate) fn unused(
-        instance: Instance<'tcx>,
-        function_coverage_info: &'tcx FunctionCoverageInfo,
-        ids_info: &'tcx CoverageIdsInfo,
-    ) -> Self {
-        Self::create(instance, function_coverage_info, ids_info, false)
-    }
-
-    fn create(
-        instance: Instance<'tcx>,
-        function_coverage_info: &'tcx FunctionCoverageInfo,
-        ids_info: &'tcx CoverageIdsInfo,
-        is_used: bool,
-    ) -> Self {
-        let num_counters = function_coverage_info.num_counters;
-        let num_expressions = function_coverage_info.expressions.len();
-        debug!(
-            "FunctionCoverage::create(instance={instance:?}) has \
-            num_counters={num_counters}, num_expressions={num_expressions}, is_used={is_used}"
-        );
-
-        Self { function_coverage_info, ids_info, is_used }
-    }
-
-    /// Identify expressions that will always have a value of zero, and note
-    /// their IDs in [`ZeroExpressions`]. Mappings that refer to a zero expression
-    /// can instead become mappings to a constant zero value.
-    ///
-    /// This method mainly exists to preserve the simplifications that were
-    /// already being performed by the Rust-side expression renumbering, so that
-    /// the resulting coverage mappings don't get worse.
-    fn identify_zero_expressions(&self) -> ZeroExpressions {
-        // The set of expressions that either were optimized out entirely, or
-        // have zero as both of their operands, and will therefore always have
-        // a value of zero. Other expressions that refer to these as operands
-        // can have those operands replaced with `CovTerm::Zero`.
-        let mut zero_expressions = ZeroExpressions::default();
-
-        // Simplify a copy of each expression based on lower-numbered expressions,
-        // and then update the set of always-zero expressions if necessary.
-        // (By construction, expressions can only refer to other expressions
-        // that have lower IDs, so one pass is sufficient.)
-        for (id, expression) in self.function_coverage_info.expressions.iter_enumerated() {
-            if !self.is_used || !self.ids_info.expressions_seen.contains(id) {
-                // If an expression was not seen, it must have been optimized away,
-                // so any operand that refers to it can be replaced with zero.
-                zero_expressions.insert(id);
-                continue;
-            }
-
-            // We don't need to simplify the actual expression data in the
-            // expressions list; we can just simplify a temporary copy and then
-            // use that to update the set of always-zero expressions.
-            let Expression { mut lhs, op, mut rhs } = *expression;
-
-            // If an expression has an operand that is also an expression, the
-            // operand's ID must be strictly lower. This is what lets us find
-            // all zero expressions in one pass.
-            let assert_operand_expression_is_lower = |operand_id: ExpressionId| {
-                assert!(
-                    operand_id < id,
-                    "Operand {operand_id:?} should be less than {id:?} in {expression:?}",
-                )
-            };
-
-            // If an operand refers to a counter or expression that is always
-            // zero, then that operand can be replaced with `CovTerm::Zero`.
-            let maybe_set_operand_to_zero = |operand: &mut CovTerm| {
-                if let CovTerm::Expression(id) = *operand {
-                    assert_operand_expression_is_lower(id);
-                }
-
-                if is_zero_term(&self.ids_info.counters_seen, &zero_expressions, *operand) {
-                    *operand = CovTerm::Zero;
-                }
-            };
-            maybe_set_operand_to_zero(&mut lhs);
-            maybe_set_operand_to_zero(&mut rhs);
-
-            // Coverage counter values cannot be negative, so if an expression
-            // involves subtraction from zero, assume that its RHS must also be zero.
-            // (Do this after simplifications that could set the LHS to zero.)
-            if lhs == CovTerm::Zero && op == Op::Subtract {
-                rhs = CovTerm::Zero;
-            }
-
-            // After the above simplifications, if both operands are zero, then
-            // we know that this expression is always zero too.
-            if lhs == CovTerm::Zero && rhs == CovTerm::Zero {
-                zero_expressions.insert(id);
-            }
-        }
-
-        zero_expressions
-    }
-
-    pub(crate) fn into_finished(self) -> FunctionCoverage<'tcx> {
-        let zero_expressions = self.identify_zero_expressions();
-        let FunctionCoverageCollector { function_coverage_info, ids_info, is_used, .. } = self;
-
-        FunctionCoverage { function_coverage_info, ids_info, is_used, zero_expressions }
-    }
-}
-
-pub(crate) struct FunctionCoverage<'tcx> {
-    pub(crate) function_coverage_info: &'tcx FunctionCoverageInfo,
-    ids_info: &'tcx CoverageIdsInfo,
-    is_used: bool,
-
-    zero_expressions: ZeroExpressions,
-}
-
-impl<'tcx> FunctionCoverage<'tcx> {
-    /// Returns true for a used (called) function, and false for an unused function.
-    pub(crate) fn is_used(&self) -> bool {
-        self.is_used
-    }
-
-    /// Return the source hash, generated from the HIR node structure, and used to indicate whether
-    /// or not the source code structure changed between different compilations.
-    pub(crate) fn source_hash(&self) -> u64 {
-        if self.is_used { self.function_coverage_info.function_source_hash } else { 0 }
-    }
-
-    /// Convert this function's coverage expression data into a form that can be
-    /// passed through FFI to LLVM.
-    pub(crate) fn counter_expressions(
-        &self,
-    ) -> impl Iterator<Item = CounterExpression> + ExactSizeIterator + Captures<'_> {
-        // We know that LLVM will optimize out any unused expressions before
-        // producing the final coverage map, so there's no need to do the same
-        // thing on the Rust side unless we're confident we can do much better.
-        // (See `CounterExpressionsMinimizer` in `CoverageMappingWriter.cpp`.)
-
-        self.function_coverage_info.expressions.iter().map(move |&Expression { lhs, op, rhs }| {
-            CounterExpression {
-                lhs: self.counter_for_term(lhs),
-                kind: match op {
-                    Op::Add => ExprKind::Add,
-                    Op::Subtract => ExprKind::Subtract,
-                },
-                rhs: self.counter_for_term(rhs),
-            }
-        })
-    }
-
-    /// Converts this function's coverage mappings into an intermediate form
-    /// that will be used by `mapgen` when preparing for FFI.
-    pub(crate) fn counter_regions(
-        &self,
-    ) -> impl Iterator<Item = (MappingKind, &SourceRegion)> + ExactSizeIterator {
-        self.function_coverage_info.mappings.iter().map(move |mapping| {
-            let Mapping { kind, source_region } = mapping;
-            let kind =
-                kind.map_terms(|term| if self.is_zero_term(term) { CovTerm::Zero } else { term });
-            (kind, source_region)
-        })
-    }
-
-    fn counter_for_term(&self, term: CovTerm) -> Counter {
-        if self.is_zero_term(term) { Counter::ZERO } else { Counter::from_term(term) }
-    }
-
-    fn is_zero_term(&self, term: CovTerm) -> bool {
-        !self.is_used || is_zero_term(&self.ids_info.counters_seen, &self.zero_expressions, term)
-    }
-}
-
-/// Set of expression IDs that are known to always evaluate to zero.
-/// Any mapping or expression operand that refers to these expressions can have
-/// that reference replaced with a constant zero value.
-#[derive(Default)]
-struct ZeroExpressions(FxIndexSet<ExpressionId>);
-
-impl ZeroExpressions {
-    fn insert(&mut self, id: ExpressionId) {
-        self.0.insert(id);
-    }
-
-    fn contains(&self, id: ExpressionId) -> bool {
-        self.0.contains(&id)
-    }
-}
-
-/// Returns `true` if the given term is known to have a value of zero, taking
-/// into account knowledge of which counters are unused and which expressions
-/// are always zero.
-fn is_zero_term(
-    counters_seen: &BitSet<CounterId>,
-    zero_expressions: &ZeroExpressions,
-    term: CovTerm,
-) -> bool {
-    match term {
-        CovTerm::Zero => true,
-        CovTerm::Counter(id) => !counters_seen.contains(id),
-        CovTerm::Expression(id) => zero_expressions.contains(id),
-    }
-}
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
index 8c24579fa7c..b3ad2a0e409 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
@@ -1,29 +1,29 @@
-use std::ffi::CString;
-use std::iter;
+use std::sync::Arc;
 
-use itertools::Itertools as _;
+use itertools::Itertools;
 use rustc_abi::Align;
 use rustc_codegen_ssa::traits::{
     BaseTypeCodegenMethods, ConstCodegenMethods, StaticCodegenMethods,
 };
-use rustc_data_structures::fx::{FxHashSet, FxIndexMap, FxIndexSet};
+use rustc_data_structures::fx::{FxHashSet, FxIndexMap};
 use rustc_hir::def_id::{DefId, LocalDefId};
 use rustc_index::IndexVec;
-use rustc_middle::mir::coverage::MappingKind;
+use rustc_middle::mir;
 use rustc_middle::ty::{self, TyCtxt};
-use rustc_middle::{bug, mir};
 use rustc_session::RemapFileNameExt;
 use rustc_session::config::RemapPathScopeComponents;
 use rustc_span::def_id::DefIdSet;
-use rustc_span::{Span, Symbol};
-use rustc_target::spec::HasTargetSpec;
+use rustc_span::{SourceFile, StableSourceFileId};
 use tracing::debug;
 
 use crate::common::CodegenCx;
-use crate::coverageinfo::map_data::{FunctionCoverage, FunctionCoverageCollector};
-use crate::coverageinfo::{ffi, llvm_cov};
+use crate::coverageinfo::llvm_cov;
+use crate::coverageinfo::mapgen::covfun::prepare_covfun_record;
 use crate::llvm;
 
+mod covfun;
+mod spans;
+
 /// Generates and exports the coverage map, which is embedded in special
 /// linker sections in the final binary.
 ///
@@ -49,83 +49,62 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
 
     debug!("Generating coverage map for CodegenUnit: `{}`", cx.codegen_unit.name());
 
-    // In order to show that unused functions have coverage counts of zero (0), LLVM requires the
-    // functions exist. Generate synthetic functions with a (required) single counter, and add the
-    // MIR `Coverage` code regions to the `function_coverage_map`, before calling
-    // `ctx.take_function_coverage_map()`.
-    if cx.codegen_unit.is_code_coverage_dead_code_cgu() {
-        add_unused_functions(cx);
-    }
-
     // FIXME(#132395): Can this be none even when coverage is enabled?
-    let function_coverage_map = match cx.coverage_cx {
-        Some(ref cx) => cx.take_function_coverage_map(),
+    let instances_used = match cx.coverage_cx {
+        Some(ref cx) => cx.instances_used.borrow(),
         None => return,
     };
-    if function_coverage_map.is_empty() {
-        // This module has no functions with coverage instrumentation
-        return;
-    }
 
-    let function_coverage_entries = function_coverage_map
-        .into_iter()
-        .map(|(instance, function_coverage)| (instance, function_coverage.into_finished()))
-        .collect::<Vec<_>>();
+    // The order of entries in this global file table needs to be deterministic,
+    // and ideally should also be independent of the details of stable-hashing,
+    // because coverage tests snapshots (`.cov-map`) can observe the order and
+    // would need to be re-blessed if it changes. As long as those requirements
+    // are satisfied, the order can be arbitrary.
+    let mut global_file_table = GlobalFileTable::new();
 
-    let all_file_names = function_coverage_entries
+    let mut covfun_records = instances_used
         .iter()
-        .map(|(_, fn_cov)| fn_cov.function_coverage_info.body_span)
-        .map(|span| span_file_name(tcx, span));
-    let global_file_table = GlobalFileTable::new(all_file_names);
-
-    // Encode all filenames referenced by coverage mappings in this CGU.
-    let filenames_buffer = global_file_table.make_filenames_buffer(tcx);
-
-    let filenames_size = filenames_buffer.len();
-    let filenames_val = cx.const_bytes(&filenames_buffer);
-    let filenames_ref = llvm_cov::hash_bytes(&filenames_buffer);
-
-    // Generate the coverage map header, which contains the filenames used by
-    // this CGU's coverage mappings, and store it in a well-known global.
-    generate_covmap_record(cx, covmap_version, filenames_size, filenames_val);
-
-    let mut unused_function_names = Vec::new();
+        .copied()
+        // Sort by symbol name, so that the global file table is built in an
+        // order that doesn't depend on the stable-hash-based order in which
+        // instances were visited during codegen.
+        .sorted_by_cached_key(|&instance| tcx.symbol_name(instance).name)
+        .filter_map(|instance| prepare_covfun_record(tcx, &mut global_file_table, instance, true))
+        .collect::<Vec<_>>();
 
-    // Encode coverage mappings and generate function records
-    for (instance, function_coverage) in function_coverage_entries {
-        debug!("Generate function coverage for {}, {:?}", cx.codegen_unit.name(), instance);
+    // In a single designated CGU, also prepare covfun records for functions
+    // in this crate that were instrumented for coverage, but are unused.
+    if cx.codegen_unit.is_code_coverage_dead_code_cgu() {
+        let mut unused_instances = gather_unused_function_instances(cx);
+        // Sort the unused instances by symbol name, for the same reason as the used ones.
+        unused_instances.sort_by_cached_key(|&instance| tcx.symbol_name(instance).name);
+        covfun_records.extend(unused_instances.into_iter().filter_map(|instance| {
+            prepare_covfun_record(tcx, &mut global_file_table, instance, false)
+        }));
+    }
 
-        let mangled_function_name = tcx.symbol_name(instance).name;
-        let source_hash = function_coverage.source_hash();
-        let is_used = function_coverage.is_used();
+    // If there are no covfun records for this CGU, don't generate a covmap record.
+    // Emitting a covmap record without any covfun records causes `llvm-cov` to
+    // fail when generating coverage reports, and if there are no covfun records
+    // then the covmap record isn't useful anyway.
+    // This should prevent a repeat of <https://github.com/rust-lang/rust/issues/133606>.
+    if covfun_records.is_empty() {
+        return;
+    }
 
-        let coverage_mapping_buffer =
-            encode_mappings_for_function(tcx, &global_file_table, &function_coverage);
+    // Encode all filenames referenced by coverage mappings in this CGU.
+    let filenames_buffer = global_file_table.make_filenames_buffer(tcx);
+    // The `llvm-cov` tool uses this hash to associate each covfun record with
+    // its corresponding filenames table, since the final binary will typically
+    // contain multiple covmap records from different compilation units.
+    let filenames_hash = llvm_cov::hash_bytes(&filenames_buffer);
 
-        if coverage_mapping_buffer.is_empty() {
-            if function_coverage.is_used() {
-                bug!(
-                    "A used function should have had coverage mapping data but did not: {}",
-                    mangled_function_name
-                );
-            } else {
-                debug!("unused function had no coverage mapping data: {}", mangled_function_name);
-                continue;
-            }
-        }
+    let mut unused_function_names = vec![];
 
-        if !is_used {
-            unused_function_names.push(mangled_function_name);
-        }
+    for covfun in &covfun_records {
+        unused_function_names.extend(covfun.mangled_function_name_if_unused());
 
-        generate_covfun_record(
-            cx,
-            mangled_function_name,
-            source_hash,
-            filenames_ref,
-            coverage_mapping_buffer,
-            is_used,
-        );
+        covfun::generate_covfun_record(cx, filenames_hash, covfun)
     }
 
     // For unused functions, we need to take their mangled names and store them
@@ -146,56 +125,58 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
         llvm::set_linkage(array, llvm::Linkage::InternalLinkage);
         llvm::set_initializer(array, initializer);
     }
+
+    // Generate the coverage map header, which contains the filenames used by
+    // this CGU's coverage mappings, and store it in a well-known global.
+    // (This is skipped if we returned early due to having no covfun records.)
+    generate_covmap_record(cx, covmap_version, &filenames_buffer);
 }
 
-/// Maps "global" (per-CGU) file ID numbers to their underlying filenames.
+/// Maps "global" (per-CGU) file ID numbers to their underlying source files.
 struct GlobalFileTable {
-    /// This "raw" table doesn't include the working dir, so a filename's
+    /// This "raw" table doesn't include the working dir, so a file's
     /// global ID is its index in this set **plus one**.
-    raw_file_table: FxIndexSet<Symbol>,
+    raw_file_table: FxIndexMap<StableSourceFileId, Arc<SourceFile>>,
 }
 
 impl GlobalFileTable {
-    fn new(all_file_names: impl IntoIterator<Item = Symbol>) -> Self {
-        // Collect all of the filenames into a set. Filenames usually come in
-        // contiguous runs, so we can dedup adjacent ones to save work.
-        let mut raw_file_table = all_file_names.into_iter().dedup().collect::<FxIndexSet<Symbol>>();
-
-        // Sort the file table by its actual string values, not the arbitrary
-        // ordering of its symbols.
-        raw_file_table.sort_unstable_by(|a, b| a.as_str().cmp(b.as_str()));
-
-        Self { raw_file_table }
+    fn new() -> Self {
+        Self { raw_file_table: FxIndexMap::default() }
     }
 
-    fn global_file_id_for_file_name(&self, file_name: Symbol) -> GlobalFileId {
-        let raw_id = self.raw_file_table.get_index_of(&file_name).unwrap_or_else(|| {
-            bug!("file name not found in prepared global file table: {file_name}");
-        });
+    fn global_file_id_for_file(&mut self, file: &Arc<SourceFile>) -> GlobalFileId {
+        // Ensure the given file has a table entry, and get its index.
+        let entry = self.raw_file_table.entry(file.stable_id);
+        let raw_id = entry.index();
+        entry.or_insert_with(|| Arc::clone(file));
+
         // The raw file table doesn't include an entry for the working dir
         // (which has ID 0), so add 1 to get the correct ID.
         GlobalFileId::from_usize(raw_id + 1)
     }
 
     fn make_filenames_buffer(&self, tcx: TyCtxt<'_>) -> Vec<u8> {
+        let mut table = Vec::with_capacity(self.raw_file_table.len() + 1);
+
         // LLVM Coverage Mapping Format version 6 (zero-based encoded as 5)
         // requires setting the first filename to the compilation directory.
         // Since rustc generates coverage maps with relative paths, the
         // compilation directory can be combined with the relative paths
         // to get absolute paths, if needed.
-        use rustc_session::RemapFileNameExt;
-        use rustc_session::config::RemapPathScopeComponents;
-        let working_dir: &str = &tcx
-            .sess
-            .opts
-            .working_dir
-            .for_scope(tcx.sess, RemapPathScopeComponents::MACRO)
-            .to_string_lossy();
-
-        // Insert the working dir at index 0, before the other filenames.
-        let filenames =
-            iter::once(working_dir).chain(self.raw_file_table.iter().map(Symbol::as_str));
-        llvm_cov::write_filenames_to_buffer(filenames)
+        table.push(
+            tcx.sess
+                .opts
+                .working_dir
+                .for_scope(tcx.sess, RemapPathScopeComponents::MACRO)
+                .to_string_lossy(),
+        );
+
+        // Add the regular entries after the base directory.
+        table.extend(self.raw_file_table.values().map(|file| {
+            file.name.for_scope(tcx.sess, RemapPathScopeComponents::MACRO).to_string_lossy()
+        }));
+
+        llvm_cov::write_filenames_to_buffer(&table)
     }
 }
 
@@ -208,12 +189,12 @@ rustc_index::newtype_index! {
     /// An index into a function's list of global file IDs. That underlying list
     /// of local-to-global mappings will be embedded in the function's record in
     /// the `__llvm_covfun` linker section.
-    pub(crate) struct LocalFileId {}
+    struct LocalFileId {}
 }
 
 /// Holds a mapping from "local" (per-function) file IDs to "global" (per-CGU)
 /// file IDs.
-#[derive(Default)]
+#[derive(Debug, Default)]
 struct VirtualFileMapping {
     local_to_global: IndexVec<LocalFileId, GlobalFileId>,
     global_to_local: FxIndexMap<GlobalFileId, LocalFileId>,
@@ -227,187 +208,45 @@ impl VirtualFileMapping {
             .or_insert_with(|| self.local_to_global.push(global_file_id))
     }
 
-    fn into_vec(self) -> Vec<u32> {
-        // This conversion should be optimized away to ~zero overhead.
-        // In any case, it's probably not hot enough to worry about.
-        self.local_to_global.into_iter().map(|global| global.as_u32()).collect()
-    }
-}
-
-fn span_file_name(tcx: TyCtxt<'_>, span: Span) -> Symbol {
-    let source_file = tcx.sess.source_map().lookup_source_file(span.lo());
-    let name =
-        source_file.name.for_scope(tcx.sess, RemapPathScopeComponents::MACRO).to_string_lossy();
-    Symbol::intern(&name)
-}
-
-/// Using the expressions and counter regions collected for a single function,
-/// generate the variable-sized payload of its corresponding `__llvm_covfun`
-/// entry. The payload is returned as a vector of bytes.
-///
-/// Newly-encountered filenames will be added to the global file table.
-fn encode_mappings_for_function(
-    tcx: TyCtxt<'_>,
-    global_file_table: &GlobalFileTable,
-    function_coverage: &FunctionCoverage<'_>,
-) -> Vec<u8> {
-    let counter_regions = function_coverage.counter_regions();
-    if counter_regions.is_empty() {
-        return Vec::new();
-    }
-
-    let expressions = function_coverage.counter_expressions().collect::<Vec<_>>();
-
-    let mut virtual_file_mapping = VirtualFileMapping::default();
-    let mut code_regions = vec![];
-    let mut branch_regions = vec![];
-    let mut mcdc_branch_regions = vec![];
-    let mut mcdc_decision_regions = vec![];
-
-    // Currently a function's mappings must all be in the same file as its body span.
-    let file_name = span_file_name(tcx, function_coverage.function_coverage_info.body_span);
-
-    // Look up the global file ID for that filename.
-    let global_file_id = global_file_table.global_file_id_for_file_name(file_name);
-
-    // Associate that global file ID with a local file ID for this function.
-    let local_file_id = virtual_file_mapping.local_id_for_global(global_file_id);
-    debug!("  file id: {local_file_id:?} => {global_file_id:?} = '{file_name:?}'");
-
-    // For each counter/region pair in this function+file, convert it to a
-    // form suitable for FFI.
-    for (mapping_kind, region) in counter_regions {
-        debug!("Adding counter {mapping_kind:?} to map for {region:?}");
-        let span = ffi::CoverageSpan::from_source_region(local_file_id, region);
-        match mapping_kind {
-            MappingKind::Code(term) => {
-                code_regions.push(ffi::CodeRegion { span, counter: ffi::Counter::from_term(term) });
-            }
-            MappingKind::Branch { true_term, false_term } => {
-                branch_regions.push(ffi::BranchRegion {
-                    span,
-                    true_counter: ffi::Counter::from_term(true_term),
-                    false_counter: ffi::Counter::from_term(false_term),
-                });
-            }
-            MappingKind::MCDCBranch { true_term, false_term, mcdc_params } => {
-                mcdc_branch_regions.push(ffi::MCDCBranchRegion {
-                    span,
-                    true_counter: ffi::Counter::from_term(true_term),
-                    false_counter: ffi::Counter::from_term(false_term),
-                    mcdc_branch_params: ffi::mcdc::BranchParameters::from(mcdc_params),
-                });
-            }
-            MappingKind::MCDCDecision(mcdc_decision_params) => {
-                mcdc_decision_regions.push(ffi::MCDCDecisionRegion {
-                    span,
-                    mcdc_decision_params: ffi::mcdc::DecisionParameters::from(mcdc_decision_params),
-                });
-            }
-        }
+    fn to_vec(&self) -> Vec<u32> {
+        // This clone could be avoided by transmuting `&[GlobalFileId]` to `&[u32]`,
+        // but it isn't hot or expensive enough to justify the extra unsafety.
+        self.local_to_global.iter().map(|&global| GlobalFileId::as_u32(global)).collect()
     }
-
-    // Encode the function's coverage mappings into a buffer.
-    llvm_cov::write_function_mappings_to_buffer(
-        &virtual_file_mapping.into_vec(),
-        &expressions,
-        &code_regions,
-        &branch_regions,
-        &mcdc_branch_regions,
-        &mcdc_decision_regions,
-    )
 }
 
 /// Generates the contents of the covmap record for this CGU, which mostly
 /// consists of a header and a list of filenames. The record is then stored
 /// as a global variable in the `__llvm_covmap` section.
-fn generate_covmap_record<'ll>(
-    cx: &CodegenCx<'ll, '_>,
-    version: u32,
-    filenames_size: usize,
-    filenames_val: &'ll llvm::Value,
-) {
-    debug!("cov map: filenames_size = {}, 0-based version = {}", filenames_size, version);
-
-    // Create the coverage data header (Note, fields 0 and 2 are now always zero,
-    // as of `llvm::coverage::CovMapVersion::Version4`.)
-    let zero_was_n_records_val = cx.const_u32(0);
-    let filenames_size_val = cx.const_u32(filenames_size as u32);
-    let zero_was_coverage_size_val = cx.const_u32(0);
-    let version_val = cx.const_u32(version);
-    let cov_data_header_val = cx.const_struct(
-        &[zero_was_n_records_val, filenames_size_val, zero_was_coverage_size_val, version_val],
-        /*packed=*/ false,
-    );
-
-    // Create the complete LLVM coverage data value to add to the LLVM IR
-    let covmap_data =
-        cx.const_struct(&[cov_data_header_val, filenames_val], /*packed=*/ false);
-
-    let llglobal = llvm::add_global(cx.llmod, cx.val_ty(covmap_data), &llvm_cov::covmap_var_name());
-    llvm::set_initializer(llglobal, covmap_data);
-    llvm::set_global_constant(llglobal, true);
-    llvm::set_linkage(llglobal, llvm::Linkage::PrivateLinkage);
-    llvm::set_section(llglobal, &llvm_cov::covmap_section_name(cx.llmod));
-    // LLVM's coverage mapping format specifies 8-byte alignment for items in this section.
-    // <https://llvm.org/docs/CoverageMappingFormat.html>
-    llvm::set_alignment(llglobal, Align::EIGHT);
-    cx.add_used_global(llglobal);
-}
-
-/// Generates the contents of the covfun record for this function, which
-/// contains the function's coverage mapping data. The record is then stored
-/// as a global variable in the `__llvm_covfun` section.
-fn generate_covfun_record(
-    cx: &CodegenCx<'_, '_>,
-    mangled_function_name: &str,
-    source_hash: u64,
-    filenames_ref: u64,
-    coverage_mapping_buffer: Vec<u8>,
-    is_used: bool,
-) {
-    // Concatenate the encoded coverage mappings
-    let coverage_mapping_size = coverage_mapping_buffer.len();
-    let coverage_mapping_val = cx.const_bytes(&coverage_mapping_buffer);
-
-    let func_name_hash = llvm_cov::hash_bytes(mangled_function_name.as_bytes());
-    let func_name_hash_val = cx.const_u64(func_name_hash);
-    let coverage_mapping_size_val = cx.const_u32(coverage_mapping_size as u32);
-    let source_hash_val = cx.const_u64(source_hash);
-    let filenames_ref_val = cx.const_u64(filenames_ref);
-    let func_record_val = cx.const_struct(
+fn generate_covmap_record<'ll>(cx: &CodegenCx<'ll, '_>, version: u32, filenames_buffer: &[u8]) {
+    // A covmap record consists of four target-endian u32 values, followed by
+    // the encoded filenames table. Two of the header fields are unused in
+    // modern versions of the LLVM coverage mapping format, and are always 0.
+    // <https://llvm.org/docs/CoverageMappingFormat.html#llvm-ir-representation>
+    // See also `src/llvm-project/clang/lib/CodeGen/CoverageMappingGen.cpp`.
+    let covmap_header = cx.const_struct(
         &[
-            func_name_hash_val,
-            coverage_mapping_size_val,
-            source_hash_val,
-            filenames_ref_val,
-            coverage_mapping_val,
+            cx.const_u32(0), // (unused)
+            cx.const_u32(filenames_buffer.len() as u32),
+            cx.const_u32(0), // (unused)
+            cx.const_u32(version),
         ],
-        /*packed=*/ true,
+        /* packed */ false,
     );
-
-    // Choose a variable name to hold this function's covfun data.
-    // Functions that are used have a suffix ("u") to distinguish them from
-    // unused copies of the same function (from different CGUs), so that if a
-    // linker sees both it won't discard the used copy's data.
-    let func_record_var_name =
-        CString::new(format!("__covrec_{:X}{}", func_name_hash, if is_used { "u" } else { "" }))
-            .unwrap();
-    debug!("function record var name: {:?}", func_record_var_name);
-
-    let llglobal = llvm::add_global(cx.llmod, cx.val_ty(func_record_val), &func_record_var_name);
-    llvm::set_initializer(llglobal, func_record_val);
-    llvm::set_global_constant(llglobal, true);
-    llvm::set_linkage(llglobal, llvm::Linkage::LinkOnceODRLinkage);
-    llvm::set_visibility(llglobal, llvm::Visibility::Hidden);
-    llvm::set_section(llglobal, cx.covfun_section_name());
+    let covmap_record = cx
+        .const_struct(&[covmap_header, cx.const_bytes(filenames_buffer)], /* packed */ false);
+
+    let covmap_global =
+        llvm::add_global(cx.llmod, cx.val_ty(covmap_record), &llvm_cov::covmap_var_name());
+    llvm::set_initializer(covmap_global, covmap_record);
+    llvm::set_global_constant(covmap_global, true);
+    llvm::set_linkage(covmap_global, llvm::Linkage::PrivateLinkage);
+    llvm::set_section(covmap_global, &llvm_cov::covmap_section_name(cx.llmod));
     // LLVM's coverage mapping format specifies 8-byte alignment for items in this section.
     // <https://llvm.org/docs/CoverageMappingFormat.html>
-    llvm::set_alignment(llglobal, Align::EIGHT);
-    if cx.target_spec().supports_comdat() {
-        llvm::set_comdat(cx.llmod, llglobal, &func_record_var_name);
-    }
-    cx.add_used_global(llglobal);
+    llvm::set_alignment(covmap_global, Align::EIGHT);
+
+    cx.add_used_global(covmap_global);
 }
 
 /// Each CGU will normally only emit coverage metadata for the functions that it actually generates.
@@ -418,39 +257,35 @@ fn generate_covfun_record(
 /// coverage map (in a single designated CGU) so that we still emit coverage mappings for them.
 /// We also end up adding their symbol names to a special global array that LLVM will include in
 /// its embedded coverage data.
-fn add_unused_functions(cx: &CodegenCx<'_, '_>) {
+fn gather_unused_function_instances<'tcx>(cx: &CodegenCx<'_, 'tcx>) -> Vec<ty::Instance<'tcx>> {
     assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
 
     let tcx = cx.tcx;
     let usage = prepare_usage_sets(tcx);
 
     let is_unused_fn = |def_id: LocalDefId| -> bool {
-        let def_id = def_id.to_def_id();
-
-        // To be eligible for "unused function" mappings, a definition must:
-        // - Be function-like
+        // Usage sets expect `DefId`, so convert from `LocalDefId`.
+        let d: DefId = LocalDefId::to_def_id(def_id);
+        // To be potentially eligible for "unused function" mappings, a definition must:
+        // - Be eligible for coverage instrumentation
         // - Not participate directly in codegen (or have lost all its coverage statements)
         // - Not have any coverage statements inlined into codegenned functions
-        tcx.def_kind(def_id).is_fn_like()
-            && (!usage.all_mono_items.contains(&def_id)
-                || usage.missing_own_coverage.contains(&def_id))
-            && !usage.used_via_inlining.contains(&def_id)
+        tcx.is_eligible_for_coverage(def_id)
+            && (!usage.all_mono_items.contains(&d) || usage.missing_own_coverage.contains(&d))
+            && !usage.used_via_inlining.contains(&d)
     };
 
-    // Scan for unused functions that were instrumented for coverage.
-    for def_id in tcx.mir_keys(()).iter().copied().filter(|&def_id| is_unused_fn(def_id)) {
-        // Get the coverage info from MIR, skipping functions that were never instrumented.
-        let body = tcx.optimized_mir(def_id);
-        let Some(function_coverage_info) = body.function_coverage_info.as_deref() else { continue };
+    // FIXME(#79651): Consider trying to filter out dummy instantiations of
+    // unused generic functions from library crates, because they can produce
+    // "unused instantiation" in coverage reports even when they are actually
+    // used by some downstream crate in the same binary.
 
-        // FIXME(79651): Consider trying to filter out dummy instantiations of
-        // unused generic functions from library crates, because they can produce
-        // "unused instantiation" in coverage reports even when they are actually
-        // used by some downstream crate in the same binary.
-
-        debug!("generating unused fn: {def_id:?}");
-        add_unused_function_coverage(cx, def_id, function_coverage_info);
-    }
+    tcx.mir_keys(())
+        .iter()
+        .copied()
+        .filter(|&def_id| is_unused_fn(def_id))
+        .map(|def_id| make_dummy_instance(tcx, def_id))
+        .collect::<Vec<_>>()
 }
 
 struct UsageSets<'tcx> {
@@ -515,16 +350,11 @@ fn prepare_usage_sets<'tcx>(tcx: TyCtxt<'tcx>) -> UsageSets<'tcx> {
     UsageSets { all_mono_items, used_via_inlining, missing_own_coverage }
 }
 
-fn add_unused_function_coverage<'tcx>(
-    cx: &CodegenCx<'_, 'tcx>,
-    def_id: LocalDefId,
-    function_coverage_info: &'tcx mir::coverage::FunctionCoverageInfo,
-) {
-    let tcx = cx.tcx;
-    let def_id = def_id.to_def_id();
+fn make_dummy_instance<'tcx>(tcx: TyCtxt<'tcx>, local_def_id: LocalDefId) -> ty::Instance<'tcx> {
+    let def_id = local_def_id.to_def_id();
 
     // Make a dummy instance that fills in all generics with placeholders.
-    let instance = ty::Instance::new(
+    ty::Instance::new(
         def_id,
         ty::GenericArgs::for_item(tcx, def_id, |param, _| {
             if let ty::GenericParamDefKind::Lifetime = param.kind {
@@ -533,14 +363,5 @@ fn add_unused_function_coverage<'tcx>(
                 tcx.mk_param_from_def(param)
             }
         }),
-    );
-
-    // An unused function's mappings will all be rewritten to map to zero.
-    let function_coverage = FunctionCoverageCollector::unused(
-        instance,
-        function_coverage_info,
-        tcx.coverage_ids_info(instance.def),
-    );
-
-    cx.coverage_cx().function_coverage_map.borrow_mut().insert(instance, function_coverage);
+    )
 }
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
new file mode 100644
index 00000000000..5428d776f41
--- /dev/null
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
@@ -0,0 +1,252 @@
+//! For each function that was instrumented for coverage, we need to embed its
+//! corresponding coverage mapping metadata inside the `__llvm_covfun`[^win]
+//! linker section of the final binary.
+//!
+//! [^win]: On Windows the section name is `.lcovfun`.
+
+use std::ffi::CString;
+
+use rustc_abi::Align;
+use rustc_codegen_ssa::traits::{
+    BaseTypeCodegenMethods, ConstCodegenMethods, StaticCodegenMethods,
+};
+use rustc_middle::mir::coverage::{
+    CovTerm, CoverageIdsInfo, Expression, FunctionCoverageInfo, Mapping, MappingKind, Op,
+};
+use rustc_middle::ty::{Instance, TyCtxt};
+use rustc_span::Span;
+use rustc_target::spec::HasTargetSpec;
+use tracing::debug;
+
+use crate::common::CodegenCx;
+use crate::coverageinfo::mapgen::{GlobalFileTable, VirtualFileMapping, spans};
+use crate::coverageinfo::{ffi, llvm_cov};
+use crate::llvm;
+
+/// Intermediate coverage metadata for a single function, used to help build
+/// the final record that will be embedded in the `__llvm_covfun` section.
+#[derive(Debug)]
+pub(crate) struct CovfunRecord<'tcx> {
+    mangled_function_name: &'tcx str,
+    source_hash: u64,
+    is_used: bool,
+
+    virtual_file_mapping: VirtualFileMapping,
+    expressions: Vec<ffi::CounterExpression>,
+    regions: ffi::Regions,
+}
+
+impl<'tcx> CovfunRecord<'tcx> {
+    /// FIXME(Zalathar): Make this the responsibility of the code that determines
+    /// which functions are unused.
+    pub(crate) fn mangled_function_name_if_unused(&self) -> Option<&'tcx str> {
+        (!self.is_used).then_some(self.mangled_function_name)
+    }
+}
+
+pub(crate) fn prepare_covfun_record<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    global_file_table: &mut GlobalFileTable,
+    instance: Instance<'tcx>,
+    is_used: bool,
+) -> Option<CovfunRecord<'tcx>> {
+    let fn_cov_info = tcx.instance_mir(instance.def).function_coverage_info.as_deref()?;
+    let ids_info = tcx.coverage_ids_info(instance.def);
+
+    let expressions = prepare_expressions(fn_cov_info, ids_info, is_used);
+
+    let mut covfun = CovfunRecord {
+        mangled_function_name: tcx.symbol_name(instance).name,
+        source_hash: if is_used { fn_cov_info.function_source_hash } else { 0 },
+        is_used,
+        virtual_file_mapping: VirtualFileMapping::default(),
+        expressions,
+        regions: ffi::Regions::default(),
+    };
+
+    fill_region_tables(tcx, global_file_table, fn_cov_info, ids_info, &mut covfun);
+
+    if covfun.regions.has_no_regions() {
+        debug!(?covfun, "function has no mappings to embed; skipping");
+        return None;
+    }
+
+    Some(covfun)
+}
+
+/// Convert the function's coverage-counter expressions into a form suitable for FFI.
+fn prepare_expressions(
+    fn_cov_info: &FunctionCoverageInfo,
+    ids_info: &CoverageIdsInfo,
+    is_used: bool,
+) -> Vec<ffi::CounterExpression> {
+    // If any counters or expressions were removed by MIR opts, replace their
+    // terms with zero.
+    let counter_for_term = |term| {
+        if !is_used || ids_info.is_zero_term(term) {
+            ffi::Counter::ZERO
+        } else {
+            ffi::Counter::from_term(term)
+        }
+    };
+
+    // We know that LLVM will optimize out any unused expressions before
+    // producing the final coverage map, so there's no need to do the same
+    // thing on the Rust side unless we're confident we can do much better.
+    // (See `CounterExpressionsMinimizer` in `CoverageMappingWriter.cpp`.)
+    fn_cov_info
+        .expressions
+        .iter()
+        .map(move |&Expression { lhs, op, rhs }| ffi::CounterExpression {
+            lhs: counter_for_term(lhs),
+            kind: match op {
+                Op::Add => ffi::ExprKind::Add,
+                Op::Subtract => ffi::ExprKind::Subtract,
+            },
+            rhs: counter_for_term(rhs),
+        })
+        .collect::<Vec<_>>()
+}
+
+/// Populates the mapping region tables in the current function's covfun record.
+fn fill_region_tables<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    global_file_table: &mut GlobalFileTable,
+    fn_cov_info: &'tcx FunctionCoverageInfo,
+    ids_info: &'tcx CoverageIdsInfo,
+    covfun: &mut CovfunRecord<'tcx>,
+) {
+    // Currently a function's mappings must all be in the same file as its body span.
+    let source_map = tcx.sess.source_map();
+    let source_file = source_map.lookup_source_file(fn_cov_info.body_span.lo());
+
+    // Look up the global file ID for that file.
+    let global_file_id = global_file_table.global_file_id_for_file(&source_file);
+
+    // Associate that global file ID with a local file ID for this function.
+    let local_file_id = covfun.virtual_file_mapping.local_id_for_global(global_file_id);
+
+    let ffi::Regions { code_regions, branch_regions, mcdc_branch_regions, mcdc_decision_regions } =
+        &mut covfun.regions;
+
+    let make_cov_span = |span: Span| {
+        spans::make_coverage_span(local_file_id, source_map, fn_cov_info, &source_file, span)
+    };
+    let discard_all = tcx.sess.coverage_discard_all_spans_in_codegen();
+
+    // For each counter/region pair in this function+file, convert it to a
+    // form suitable for FFI.
+    let is_zero_term = |term| !covfun.is_used || ids_info.is_zero_term(term);
+    for &Mapping { ref kind, span } in &fn_cov_info.mappings {
+        // If the mapping refers to counters/expressions that were removed by
+        // MIR opts, replace those occurrences with zero.
+        let kind = kind.map_terms(|term| if is_zero_term(term) { CovTerm::Zero } else { term });
+
+        // Convert the `Span` into coordinates that we can pass to LLVM, or
+        // discard the span if conversion fails. In rare, cases _all_ of a
+        // function's spans are discarded, and the rest of coverage codegen
+        // needs to handle that gracefully to avoid a repeat of #133606.
+        // We don't have a good test case for triggering that organically, so
+        // instead we set `-Zcoverage-options=discard-all-spans-in-codegen`
+        // to force it to occur.
+        let Some(cov_span) = make_cov_span(span) else { continue };
+        if discard_all {
+            continue;
+        }
+
+        match kind {
+            MappingKind::Code(term) => {
+                code_regions
+                    .push(ffi::CodeRegion { cov_span, counter: ffi::Counter::from_term(term) });
+            }
+            MappingKind::Branch { true_term, false_term } => {
+                branch_regions.push(ffi::BranchRegion {
+                    cov_span,
+                    true_counter: ffi::Counter::from_term(true_term),
+                    false_counter: ffi::Counter::from_term(false_term),
+                });
+            }
+            MappingKind::MCDCBranch { true_term, false_term, mcdc_params } => {
+                mcdc_branch_regions.push(ffi::MCDCBranchRegion {
+                    cov_span,
+                    true_counter: ffi::Counter::from_term(true_term),
+                    false_counter: ffi::Counter::from_term(false_term),
+                    mcdc_branch_params: ffi::mcdc::BranchParameters::from(mcdc_params),
+                });
+            }
+            MappingKind::MCDCDecision(mcdc_decision_params) => {
+                mcdc_decision_regions.push(ffi::MCDCDecisionRegion {
+                    cov_span,
+                    mcdc_decision_params: ffi::mcdc::DecisionParameters::from(mcdc_decision_params),
+                });
+            }
+        }
+    }
+}
+
+/// Generates the contents of the covfun record for this function, which
+/// contains the function's coverage mapping data. The record is then stored
+/// as a global variable in the `__llvm_covfun` section.
+pub(crate) fn generate_covfun_record<'tcx>(
+    cx: &CodegenCx<'_, 'tcx>,
+    filenames_hash: u64,
+    covfun: &CovfunRecord<'tcx>,
+) {
+    let &CovfunRecord {
+        mangled_function_name,
+        source_hash,
+        is_used,
+        ref virtual_file_mapping,
+        ref expressions,
+        ref regions,
+    } = covfun;
+
+    // Encode the function's coverage mappings into a buffer.
+    let coverage_mapping_buffer = llvm_cov::write_function_mappings_to_buffer(
+        &virtual_file_mapping.to_vec(),
+        expressions,
+        regions,
+    );
+
+    // A covfun record consists of four target-endian integers, followed by the
+    // encoded mapping data in bytes. Note that the length field is 32 bits.
+    // <https://llvm.org/docs/CoverageMappingFormat.html#llvm-ir-representation>
+    // See also `src/llvm-project/clang/lib/CodeGen/CoverageMappingGen.cpp` and
+    // `COVMAP_V3` in `src/llvm-project/llvm/include/llvm/ProfileData/InstrProfData.inc`.
+    let func_name_hash = llvm_cov::hash_bytes(mangled_function_name.as_bytes());
+    let covfun_record = cx.const_struct(
+        &[
+            cx.const_u64(func_name_hash),
+            cx.const_u32(coverage_mapping_buffer.len() as u32),
+            cx.const_u64(source_hash),
+            cx.const_u64(filenames_hash),
+            cx.const_bytes(&coverage_mapping_buffer),
+        ],
+        // This struct needs to be packed, so that the 32-bit length field
+        // doesn't have unexpected padding.
+        true,
+    );
+
+    // Choose a variable name to hold this function's covfun data.
+    // Functions that are used have a suffix ("u") to distinguish them from
+    // unused copies of the same function (from different CGUs), so that if a
+    // linker sees both it won't discard the used copy's data.
+    let u = if is_used { "u" } else { "" };
+    let covfun_var_name = CString::new(format!("__covrec_{func_name_hash:X}{u}")).unwrap();
+    debug!("function record var name: {covfun_var_name:?}");
+
+    let covfun_global = llvm::add_global(cx.llmod, cx.val_ty(covfun_record), &covfun_var_name);
+    llvm::set_initializer(covfun_global, covfun_record);
+    llvm::set_global_constant(covfun_global, true);
+    llvm::set_linkage(covfun_global, llvm::Linkage::LinkOnceODRLinkage);
+    llvm::set_visibility(covfun_global, llvm::Visibility::Hidden);
+    llvm::set_section(covfun_global, cx.covfun_section_name());
+    // LLVM's coverage mapping format specifies 8-byte alignment for items in this section.
+    // <https://llvm.org/docs/CoverageMappingFormat.html>
+    llvm::set_alignment(covfun_global, Align::EIGHT);
+    if cx.target_spec().supports_comdat() {
+        llvm::set_comdat(cx.llmod, covfun_global, &covfun_var_name);
+    }
+
+    cx.add_used_global(covfun_global);
+}
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs
new file mode 100644
index 00000000000..6d1d91340c2
--- /dev/null
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs
@@ -0,0 +1,126 @@
+use rustc_middle::mir::coverage::FunctionCoverageInfo;
+use rustc_span::source_map::SourceMap;
+use rustc_span::{BytePos, Pos, SourceFile, Span};
+use tracing::debug;
+
+use crate::coverageinfo::ffi;
+use crate::coverageinfo::mapgen::LocalFileId;
+
+/// Converts the span into its start line and column, and end line and column.
+///
+/// Line numbers and column numbers are 1-based. Unlike most column numbers emitted by
+/// the compiler, these column numbers are denoted in **bytes**, because that's what
+/// LLVM's `llvm-cov` tool expects to see in coverage maps.
+///
+/// Returns `None` if the conversion failed for some reason. This shouldn't happen,
+/// but it's hard to rule out entirely (especially in the presence of complex macros
+/// or other expansions), and if it does happen then skipping a span or function is
+/// better than an ICE or `llvm-cov` failure that the user might have no way to avoid.
+pub(crate) fn make_coverage_span(
+    file_id: LocalFileId,
+    source_map: &SourceMap,
+    fn_cov_info: &FunctionCoverageInfo,
+    file: &SourceFile,
+    span: Span,
+) -> Option<ffi::CoverageSpan> {
+    let span = ensure_non_empty_span(source_map, fn_cov_info, span)?;
+
+    let lo = span.lo();
+    let hi = span.hi();
+
+    // Column numbers need to be in bytes, so we can't use the more convenient
+    // `SourceMap` methods for looking up file coordinates.
+    let line_and_byte_column = |pos: BytePos| -> Option<(usize, usize)> {
+        let rpos = file.relative_position(pos);
+        let line_index = file.lookup_line(rpos)?;
+        let line_start = file.lines()[line_index];
+        // Line numbers and column numbers are 1-based, so add 1 to each.
+        Some((line_index + 1, (rpos - line_start).to_usize() + 1))
+    };
+
+    let (mut start_line, start_col) = line_and_byte_column(lo)?;
+    let (mut end_line, end_col) = line_and_byte_column(hi)?;
+
+    // Apply an offset so that code in doctests has correct line numbers.
+    // FIXME(#79417): Currently we have no way to offset doctest _columns_.
+    start_line = source_map.doctest_offset_line(&file.name, start_line);
+    end_line = source_map.doctest_offset_line(&file.name, end_line);
+
+    check_coverage_span(ffi::CoverageSpan {
+        file_id: file_id.as_u32(),
+        start_line: start_line as u32,
+        start_col: start_col as u32,
+        end_line: end_line as u32,
+        end_col: end_col as u32,
+    })
+}
+
+fn ensure_non_empty_span(
+    source_map: &SourceMap,
+    fn_cov_info: &FunctionCoverageInfo,
+    span: Span,
+) -> Option<Span> {
+    if !span.is_empty() {
+        return Some(span);
+    }
+
+    let lo = span.lo();
+    let hi = span.hi();
+
+    // The span is empty, so try to expand it to cover an adjacent '{' or '}',
+    // but only within the bounds of the body span.
+    let try_next = hi < fn_cov_info.body_span.hi();
+    let try_prev = fn_cov_info.body_span.lo() < lo;
+    if !(try_next || try_prev) {
+        return None;
+    }
+
+    source_map
+        .span_to_source(span, |src, start, end| try {
+            // Adjusting span endpoints by `BytePos(1)` is normally a bug,
+            // but in this case we have specifically checked that the character
+            // we're skipping over is one of two specific ASCII characters, so
+            // adjusting by exactly 1 byte is correct.
+            if try_next && src.as_bytes()[end] == b'{' {
+                Some(span.with_hi(hi + BytePos(1)))
+            } else if try_prev && src.as_bytes()[start - 1] == b'}' {
+                Some(span.with_lo(lo - BytePos(1)))
+            } else {
+                None
+            }
+        })
+        .ok()?
+}
+
+/// If `llvm-cov` sees a source region that is improperly ordered (end < start),
+/// it will immediately exit with a fatal error. To prevent that from happening,
+/// discard regions that are improperly ordered, or might be interpreted in a
+/// way that makes them improperly ordered.
+fn check_coverage_span(cov_span: ffi::CoverageSpan) -> Option<ffi::CoverageSpan> {
+    let ffi::CoverageSpan { file_id: _, start_line, start_col, end_line, end_col } = cov_span;
+
+    // Line/column coordinates are supposed to be 1-based. If we ever emit
+    // coordinates of 0, `llvm-cov` might misinterpret them.
+    let all_nonzero = [start_line, start_col, end_line, end_col].into_iter().all(|x| x != 0);
+    // Coverage mappings use the high bit of `end_col` to indicate that a
+    // region is actually a "gap" region, so make sure it's unset.
+    let end_col_has_high_bit_unset = (end_col & (1 << 31)) == 0;
+    // If a region is improperly ordered (end < start), `llvm-cov` will exit
+    // with a fatal error, which is inconvenient for users and hard to debug.
+    let is_ordered = (start_line, start_col) <= (end_line, end_col);
+
+    if all_nonzero && end_col_has_high_bit_unset && is_ordered {
+        Some(cov_span)
+    } else {
+        debug!(
+            ?cov_span,
+            ?all_nonzero,
+            ?end_col_has_high_bit_unset,
+            ?is_ordered,
+            "Skipping source region that would be misinterpreted or rejected by LLVM"
+        );
+        // If this happens in a debug build, ICE to make it easier to notice.
+        debug_assert!(false, "Improper source region: {cov_span:?}");
+        None
+    }
+}
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
index c2fcb33f98b..7311cd9d230 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
@@ -5,7 +5,7 @@ use rustc_abi::Size;
 use rustc_codegen_ssa::traits::{
     BuilderMethods, ConstCodegenMethods, CoverageInfoBuilderMethods, MiscCodegenMethods,
 };
-use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
+use rustc_data_structures::fx::{FxHashMap, FxIndexSet};
 use rustc_middle::mir::coverage::CoverageKind;
 use rustc_middle::ty::Instance;
 use rustc_middle::ty::layout::HasTyCtxt;
@@ -13,19 +13,16 @@ use tracing::{debug, instrument};
 
 use crate::builder::Builder;
 use crate::common::CodegenCx;
-use crate::coverageinfo::map_data::FunctionCoverageCollector;
 use crate::llvm;
 
 pub(crate) mod ffi;
 mod llvm_cov;
-pub(crate) mod map_data;
 mod mapgen;
 
 /// Extra per-CGU context/state needed for coverage instrumentation.
 pub(crate) struct CguCoverageContext<'ll, 'tcx> {
     /// Coverage data for each instrumented function identified by DefId.
-    pub(crate) function_coverage_map:
-        RefCell<FxIndexMap<Instance<'tcx>, FunctionCoverageCollector<'tcx>>>,
+    pub(crate) instances_used: RefCell<FxIndexSet<Instance<'tcx>>>,
     pub(crate) pgo_func_name_var_map: RefCell<FxHashMap<Instance<'tcx>, &'ll llvm::Value>>,
     pub(crate) mcdc_condition_bitmap_map: RefCell<FxHashMap<Instance<'tcx>, Vec<&'ll llvm::Value>>>,
 
@@ -35,19 +32,13 @@ pub(crate) struct CguCoverageContext<'ll, 'tcx> {
 impl<'ll, 'tcx> CguCoverageContext<'ll, 'tcx> {
     pub(crate) fn new() -> Self {
         Self {
-            function_coverage_map: Default::default(),
+            instances_used: RefCell::<FxIndexSet<_>>::default(),
             pgo_func_name_var_map: Default::default(),
             mcdc_condition_bitmap_map: Default::default(),
             covfun_section_name: Default::default(),
         }
     }
 
-    fn take_function_coverage_map(
-        &self,
-    ) -> FxIndexMap<Instance<'tcx>, FunctionCoverageCollector<'tcx>> {
-        self.function_coverage_map.replace(FxIndexMap::default())
-    }
-
     /// LLVM use a temp value to record evaluated mcdc test vector of each decision, which is
     /// called condition bitmap. In order to handle nested decisions, several condition bitmaps can
     /// be allocated for a function body. These values are named `mcdc.addr.{i}` and are a 32-bit
@@ -160,13 +151,7 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
         // Mark the instance as used in this CGU, for coverage purposes.
         // This includes functions that were not partitioned into this CGU,
         // but were MIR-inlined into one of this CGU's functions.
-        coverage_cx.function_coverage_map.borrow_mut().entry(instance).or_insert_with(|| {
-            FunctionCoverageCollector::new(
-                instance,
-                function_coverage_info,
-                bx.tcx.coverage_ids_info(instance.def),
-            )
-        });
+        coverage_cx.instances_used.borrow_mut().insert(instance);
 
         match *kind {
             CoverageKind::SpanMarker | CoverageKind::BlockMarker { .. } => unreachable!(
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs b/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
index aef8642f199..2c9f1cda13a 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
@@ -7,7 +7,7 @@ use rustc_hir::def_id::LOCAL_CRATE;
 use rustc_middle::bug;
 use rustc_middle::middle::debugger_visualizer::DebuggerVisualizerType;
 use rustc_session::config::{CrateType, DebugInfo};
-use rustc_span::symbol::sym;
+use rustc_span::sym;
 
 use crate::builder::Builder;
 use crate::common::CodegenCx;
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
index dee67baaee9..40248a9009a 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
@@ -16,8 +16,7 @@ use rustc_middle::ty::{
     self, AdtKind, CoroutineArgsExt, Instance, PolyExistentialTraitRef, Ty, TyCtxt, Visibility,
 };
 use rustc_session::config::{self, DebugInfo, Lto};
-use rustc_span::symbol::Symbol;
-use rustc_span::{DUMMY_SP, FileName, FileNameDisplayPreference, SourceFile, hygiene};
+use rustc_span::{DUMMY_SP, FileName, FileNameDisplayPreference, SourceFile, Symbol, hygiene};
 use rustc_symbol_mangling::typeid_for_trait_ref;
 use rustc_target::spec::DebuginfoKind;
 use smallvec::smallvec;
@@ -471,8 +470,6 @@ pub(crate) fn type_di_node<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, t: Ty<'tcx>) ->
             AdtKind::Enum => enums::build_enum_type_di_node(cx, unique_type_id),
         },
         ty::Tuple(_) => build_tuple_type_di_node(cx, unique_type_id),
-        // Type parameters from polymorphized functions.
-        ty::Param(_) => build_param_type_di_node(cx, t),
         _ => bug!("debuginfo: unexpected type in type_di_node(): {:?}", t),
     };
 
@@ -871,26 +868,6 @@ fn build_foreign_type_di_node<'ll, 'tcx>(
     )
 }
 
-fn build_param_type_di_node<'ll, 'tcx>(
-    cx: &CodegenCx<'ll, 'tcx>,
-    t: Ty<'tcx>,
-) -> DINodeCreationResult<'ll> {
-    debug!("build_param_type_di_node: {:?}", t);
-    let name = format!("{t:?}");
-    DINodeCreationResult {
-        di_node: unsafe {
-            llvm::LLVMRustDIBuilderCreateBasicType(
-                DIB(cx),
-                name.as_c_char_ptr(),
-                name.len(),
-                Size::ZERO.bits(),
-                DW_ATE_unsigned,
-            )
-        },
-        already_stored_in_typemap: false,
-    }
-}
-
 pub(crate) fn build_compile_unit_di_node<'ll, 'tcx>(
     tcx: TyCtxt<'tcx>,
     codegen_unit_name: &str,
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs
index d374767f187..23e11748e52 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs
@@ -212,21 +212,17 @@ pub(super) fn build_enum_type_di_node<'ll, 'tcx>(
         ),
         |cx, enum_type_di_node| {
             match enum_type_and_layout.variants {
-                Variants::Single { index: variant_index } => {
-                    if enum_adt_def.variants().is_empty() {
-                        // Uninhabited enums have Variants::Single. We don't generate
-                        // any members for them.
-                        return smallvec![];
-                    }
-
-                    build_single_variant_union_fields(
-                        cx,
-                        enum_adt_def,
-                        enum_type_and_layout,
-                        enum_type_di_node,
-                        variant_index,
-                    )
+                Variants::Empty => {
+                    // We don't generate any members for uninhabited types.
+                    return smallvec![];
                 }
+                Variants::Single { index: variant_index } => build_single_variant_union_fields(
+                    cx,
+                    enum_adt_def,
+                    enum_type_and_layout,
+                    enum_type_di_node,
+                    variant_index,
+                ),
                 Variants::Multiple {
                     tag_encoding: TagEncoding::Direct,
                     ref variants,
@@ -303,6 +299,7 @@ pub(super) fn build_coroutine_di_node<'ll, 'tcx>(
                 )
             }
             Variants::Single { .. }
+            | Variants::Empty
             | Variants::Multiple { tag_encoding: TagEncoding::Niche { .. }, .. } => {
                 bug!(
                     "Encountered coroutine with non-direct-tag layout: {:?}",
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs
index 65ab22ad89e..9f6a5cc89e0 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs
@@ -392,7 +392,7 @@ fn compute_discriminant_value<'ll, 'tcx>(
     variant_index: VariantIdx,
 ) -> DiscrResult {
     match enum_type_and_layout.layout.variants() {
-        &Variants::Single { .. } => DiscrResult::NoDiscriminant,
+        &Variants::Single { .. } | &Variants::Empty => DiscrResult::NoDiscriminant,
         &Variants::Multiple { tag_encoding: TagEncoding::Direct, .. } => DiscrResult::Value(
             enum_type_and_layout.ty.discriminant_for_variant(cx.tcx, variant_index).unwrap().val,
         ),
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs
index 241bf167a81..11824398f24 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs
@@ -358,8 +358,8 @@ fn build_discr_member_di_node<'ll, 'tcx>(
     let containing_scope = enum_or_coroutine_type_di_node;
 
     match enum_or_coroutine_type_and_layout.layout.variants() {
-        // A single-variant enum has no discriminant.
-        &Variants::Single { .. } => None,
+        // A single-variant or no-variant enum has no discriminant.
+        &Variants::Single { .. } | &Variants::Empty => None,
 
         &Variants::Multiple { tag_field, .. } => {
             let tag_base_type = tag_base_type(cx.tcx, enum_or_coroutine_type_and_layout);
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
index a8fdfbed592..fae698bea2a 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
@@ -19,9 +19,8 @@ use rustc_middle::ty::layout::{HasTypingEnv, LayoutOf};
 use rustc_middle::ty::{self, GenericArgsRef, Instance, Ty, TypeVisitableExt};
 use rustc_session::Session;
 use rustc_session::config::{self, DebugInfo};
-use rustc_span::symbol::Symbol;
 use rustc_span::{
-    BytePos, Pos, SourceFile, SourceFileAndLine, SourceFileHash, Span, StableSourceFileId,
+    BytePos, Pos, SourceFile, SourceFileAndLine, SourceFileHash, Span, StableSourceFileId, Symbol,
 };
 use smallvec::SmallVec;
 use tracing::debug;
diff --git a/compiler/rustc_codegen_llvm/src/declare.rs b/compiler/rustc_codegen_llvm/src/declare.rs
index d338c848754..3ec386f6b07 100644
--- a/compiler/rustc_codegen_llvm/src/declare.rs
+++ b/compiler/rustc_codegen_llvm/src/declare.rs
@@ -32,7 +32,7 @@ use crate::{attributes, llvm};
 ///
 /// If there’s a value with the same name already declared, the function will
 /// update the declaration and return existing Value instead.
-fn declare_raw_fn<'ll>(
+pub(crate) fn declare_raw_fn<'ll>(
     cx: &CodegenCx<'ll, '_>,
     name: &str,
     callconv: llvm::CallConv,
diff --git a/compiler/rustc_codegen_llvm/src/errors.rs b/compiler/rustc_codegen_llvm/src/errors.rs
index 3cdb5b971d9..f340b06e876 100644
--- a/compiler/rustc_codegen_llvm/src/errors.rs
+++ b/compiler/rustc_codegen_llvm/src/errors.rs
@@ -90,6 +90,11 @@ impl<G: EmissionGuarantee> Diagnostic<'_, G> for ParseTargetMachineConfig<'_> {
 }
 
 #[derive(Diagnostic)]
+#[diag(codegen_llvm_autodiff_without_lto)]
+#[note]
+pub(crate) struct AutoDiffWithoutLTO;
+
+#[derive(Diagnostic)]
 #[diag(codegen_llvm_lto_disallowed)]
 pub(crate) struct LtoDisallowed;
 
@@ -131,6 +136,8 @@ pub enum LlvmError<'a> {
     PrepareThinLtoModule,
     #[diag(codegen_llvm_parse_bitcode)]
     ParseBitcode,
+    #[diag(codegen_llvm_prepare_autodiff)]
+    PrepareAutoDiff { src: String, target: String, error: String },
 }
 
 pub(crate) struct WithLlvmError<'a>(pub LlvmError<'a>, pub String);
@@ -152,6 +159,7 @@ impl<G: EmissionGuarantee> Diagnostic<'_, G> for WithLlvmError<'_> {
             }
             PrepareThinLtoModule => fluent::codegen_llvm_prepare_thin_lto_module_with_llvm_err,
             ParseBitcode => fluent::codegen_llvm_parse_bitcode_with_llvm_err,
+            PrepareAutoDiff { .. } => fluent::codegen_llvm_prepare_autodiff_with_llvm_err,
         };
         self.0
             .into_diag(dcx, level)
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index d8b055137b3..cabcfc9b42b 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -340,6 +340,37 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                     self.const_i32(cache_type),
                 ])
             }
+            sym::carrying_mul_add => {
+                let (size, signed) = fn_args.type_at(0).int_size_and_signed(self.tcx);
+
+                let wide_llty = self.type_ix(size.bits() * 2);
+                let args = args.as_array().unwrap();
+                let [a, b, c, d] = args.map(|a| self.intcast(a.immediate(), wide_llty, signed));
+
+                let wide = if signed {
+                    let prod = self.unchecked_smul(a, b);
+                    let acc = self.unchecked_sadd(prod, c);
+                    self.unchecked_sadd(acc, d)
+                } else {
+                    let prod = self.unchecked_umul(a, b);
+                    let acc = self.unchecked_uadd(prod, c);
+                    self.unchecked_uadd(acc, d)
+                };
+
+                let narrow_llty = self.type_ix(size.bits());
+                let low = self.trunc(wide, narrow_llty);
+                let bits_const = self.const_uint(wide_llty, size.bits());
+                // No need for ashr when signed; LLVM changes it to lshr anyway.
+                let high = self.lshr(wide, bits_const);
+                // FIXME: could be `trunc nuw`, even for signed.
+                let high = self.trunc(high, narrow_llty);
+
+                let pair_llty = self.type_struct(&[narrow_llty, narrow_llty], false);
+                let pair = self.const_poison(pair_llty);
+                let pair = self.insert_value(pair, low, 0);
+                let pair = self.insert_value(pair, high, 1);
+                pair
+            }
             sym::ctlz
             | sym::ctlz_nonzero
             | sym::cttz
@@ -352,84 +383,84 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
             | sym::saturating_add
             | sym::saturating_sub => {
                 let ty = arg_tys[0];
-                match int_type_width_signed(ty, self) {
-                    Some((width, signed)) => match name {
-                        sym::ctlz | sym::cttz => {
-                            let y = self.const_bool(false);
-                            let ret = self.call_intrinsic(&format!("llvm.{name}.i{width}"), &[
-                                args[0].immediate(),
-                                y,
-                            ]);
-
-                            self.intcast(ret, llret_ty, false)
-                        }
-                        sym::ctlz_nonzero => {
-                            let y = self.const_bool(true);
-                            let llvm_name = &format!("llvm.ctlz.i{width}");
-                            let ret = self.call_intrinsic(llvm_name, &[args[0].immediate(), y]);
-                            self.intcast(ret, llret_ty, false)
-                        }
-                        sym::cttz_nonzero => {
-                            let y = self.const_bool(true);
-                            let llvm_name = &format!("llvm.cttz.i{width}");
-                            let ret = self.call_intrinsic(llvm_name, &[args[0].immediate(), y]);
-                            self.intcast(ret, llret_ty, false)
-                        }
-                        sym::ctpop => {
-                            let ret = self.call_intrinsic(&format!("llvm.ctpop.i{width}"), &[args
-                                [0]
-                            .immediate()]);
-                            self.intcast(ret, llret_ty, false)
-                        }
-                        sym::bswap => {
-                            if width == 8 {
-                                args[0].immediate() // byte swap a u8/i8 is just a no-op
-                            } else {
-                                self.call_intrinsic(&format!("llvm.bswap.i{width}"), &[
-                                    args[0].immediate()
-                                ])
-                            }
-                        }
-                        sym::bitreverse => self
-                            .call_intrinsic(&format!("llvm.bitreverse.i{width}"), &[
+                if !ty.is_integral() {
+                    tcx.dcx().emit_err(InvalidMonomorphization::BasicIntegerType {
+                        span,
+                        name,
+                        ty,
+                    });
+                    return Ok(());
+                }
+                let (size, signed) = ty.int_size_and_signed(self.tcx);
+                let width = size.bits();
+                match name {
+                    sym::ctlz | sym::cttz => {
+                        let y = self.const_bool(false);
+                        let ret = self.call_intrinsic(&format!("llvm.{name}.i{width}"), &[
+                            args[0].immediate(),
+                            y,
+                        ]);
+
+                        self.intcast(ret, llret_ty, false)
+                    }
+                    sym::ctlz_nonzero => {
+                        let y = self.const_bool(true);
+                        let llvm_name = &format!("llvm.ctlz.i{width}");
+                        let ret = self.call_intrinsic(llvm_name, &[args[0].immediate(), y]);
+                        self.intcast(ret, llret_ty, false)
+                    }
+                    sym::cttz_nonzero => {
+                        let y = self.const_bool(true);
+                        let llvm_name = &format!("llvm.cttz.i{width}");
+                        let ret = self.call_intrinsic(llvm_name, &[args[0].immediate(), y]);
+                        self.intcast(ret, llret_ty, false)
+                    }
+                    sym::ctpop => {
+                        let ret = self.call_intrinsic(&format!("llvm.ctpop.i{width}"), &[
+                            args[0].immediate()
+                        ]);
+                        self.intcast(ret, llret_ty, false)
+                    }
+                    sym::bswap => {
+                        if width == 8 {
+                            args[0].immediate() // byte swap a u8/i8 is just a no-op
+                        } else {
+                            self.call_intrinsic(&format!("llvm.bswap.i{width}"), &[
                                 args[0].immediate()
-                            ]),
-                        sym::rotate_left | sym::rotate_right => {
-                            let is_left = name == sym::rotate_left;
-                            let val = args[0].immediate();
-                            let raw_shift = args[1].immediate();
-                            // rotate = funnel shift with first two args the same
-                            let llvm_name =
-                                &format!("llvm.fsh{}.i{}", if is_left { 'l' } else { 'r' }, width);
-
-                            // llvm expects shift to be the same type as the values, but rust
-                            // always uses `u32`.
-                            let raw_shift = self.intcast(raw_shift, self.val_ty(val), false);
-
-                            self.call_intrinsic(llvm_name, &[val, val, raw_shift])
+                            ])
                         }
-                        sym::saturating_add | sym::saturating_sub => {
-                            let is_add = name == sym::saturating_add;
-                            let lhs = args[0].immediate();
-                            let rhs = args[1].immediate();
-                            let llvm_name = &format!(
-                                "llvm.{}{}.sat.i{}",
-                                if signed { 's' } else { 'u' },
-                                if is_add { "add" } else { "sub" },
-                                width
-                            );
-                            self.call_intrinsic(llvm_name, &[lhs, rhs])
-                        }
-                        _ => bug!(),
-                    },
-                    None => {
-                        tcx.dcx().emit_err(InvalidMonomorphization::BasicIntegerType {
-                            span,
-                            name,
-                            ty,
-                        });
-                        return Ok(());
                     }
+                    sym::bitreverse => self
+                        .call_intrinsic(&format!("llvm.bitreverse.i{width}"), &[
+                            args[0].immediate()
+                        ]),
+                    sym::rotate_left | sym::rotate_right => {
+                        let is_left = name == sym::rotate_left;
+                        let val = args[0].immediate();
+                        let raw_shift = args[1].immediate();
+                        // rotate = funnel shift with first two args the same
+                        let llvm_name =
+                            &format!("llvm.fsh{}.i{}", if is_left { 'l' } else { 'r' }, width);
+
+                        // llvm expects shift to be the same type as the values, but rust
+                        // always uses `u32`.
+                        let raw_shift = self.intcast(raw_shift, self.val_ty(val), false);
+
+                        self.call_intrinsic(llvm_name, &[val, val, raw_shift])
+                    }
+                    sym::saturating_add | sym::saturating_sub => {
+                        let is_add = name == sym::saturating_add;
+                        let lhs = args[0].immediate();
+                        let rhs = args[1].immediate();
+                        let llvm_name = &format!(
+                            "llvm.{}{}.sat.i{}",
+                            if signed { 's' } else { 'u' },
+                            if is_add { "add" } else { "sub" },
+                            width
+                        );
+                        self.call_intrinsic(llvm_name, &[lhs, rhs])
+                    }
+                    _ => bug!(),
                 }
             }
 
@@ -2531,19 +2562,3 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
 
     span_bug!(span, "unknown SIMD intrinsic");
 }
-
-// Returns the width of an int Ty, and if it's signed or not
-// Returns None if the type is not an integer
-// FIXME: there’s multiple of this functions, investigate using some of the already existing
-// stuffs.
-fn int_type_width_signed(ty: Ty<'_>, cx: &CodegenCx<'_, '_>) -> Option<(u64, bool)> {
-    match ty.kind() {
-        ty::Int(t) => {
-            Some((t.bit_width().unwrap_or(u64::from(cx.tcx.sess.target.pointer_width)), true))
-        }
-        ty::Uint(t) => {
-            Some((t.bit_width().unwrap_or(u64::from(cx.tcx.sess.target.pointer_width)), false))
-        }
-        _ => None,
-    }
-}
diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
index 3dfb86d422d..06afe8bb3ad 100644
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -17,6 +17,8 @@
 #![feature(iter_intersperse)]
 #![feature(let_chains)]
 #![feature(rustdoc_internals)]
+#![feature(slice_as_array)]
+#![feature(try_blocks)]
 #![warn(unreachable_pub)]
 // tidy-alphabetical-end
 
@@ -26,9 +28,10 @@ use std::mem::ManuallyDrop;
 
 use back::owned_target_machine::OwnedTargetMachine;
 use back::write::{create_informational_target_machine, create_target_machine};
-use errors::ParseTargetMachineConfig;
-pub use llvm_util::target_features;
+use errors::{AutoDiffWithoutLTO, ParseTargetMachineConfig};
+pub use llvm_util::target_features_cfg;
 use rustc_ast::expand::allocator::AllocatorKind;
+use rustc_ast::expand::autodiff_attrs::AutoDiffItem;
 use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule};
 use rustc_codegen_ssa::back::write::{
     CodegenContext, FatLtoInput, ModuleConfig, TargetMachineFactoryConfig, TargetMachineFactoryFn,
@@ -36,14 +39,14 @@ use rustc_codegen_ssa::back::write::{
 use rustc_codegen_ssa::traits::*;
 use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen};
 use rustc_data_structures::fx::FxIndexMap;
-use rustc_errors::{DiagCtxtHandle, ErrorGuaranteed, FatalError};
+use rustc_errors::{DiagCtxtHandle, FatalError};
 use rustc_metadata::EncodedMetadata;
 use rustc_middle::dep_graph::{WorkProduct, WorkProductId};
 use rustc_middle::ty::TyCtxt;
 use rustc_middle::util::Providers;
 use rustc_session::Session;
-use rustc_session::config::{OptLevel, OutputFilenames, PrintKind, PrintRequest};
-use rustc_span::symbol::Symbol;
+use rustc_session::config::{Lto, OptLevel, OutputFilenames, PrintKind, PrintRequest};
+use rustc_span::Symbol;
 
 mod back {
     pub(crate) mod archive;
@@ -231,6 +234,20 @@ impl WriteBackendMethods for LlvmCodegenBackend {
     fn serialize_module(module: ModuleCodegen<Self::Module>) -> (String, Self::ModuleBuffer) {
         (module.name, back::lto::ModuleBuffer::new(module.module_llvm.llmod()))
     }
+    /// Generate autodiff rules
+    fn autodiff(
+        cgcx: &CodegenContext<Self>,
+        tcx: TyCtxt<'_>,
+        module: &ModuleCodegen<Self::Module>,
+        diff_fncs: Vec<AutoDiffItem>,
+        config: &ModuleConfig,
+    ) -> Result<(), FatalError> {
+        if cgcx.lto != Lto::Fat {
+            let dcx = cgcx.create_dcx();
+            return Err(dcx.handle().emit_almost_fatal(AutoDiffWithoutLTO));
+        }
+        builder::autodiff::differentiate(module, cgcx, tcx, diff_fncs, config)
+    }
 }
 
 unsafe impl Send for LlvmCodegenBackend {} // Llvm is on a per-thread basis
@@ -330,8 +347,8 @@ impl CodegenBackend for LlvmCodegenBackend {
         llvm_util::print_version();
     }
 
-    fn target_features(&self, sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
-        target_features(sess, allow_unstable)
+    fn target_features_cfg(&self, sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
+        target_features_cfg(sess, allow_unstable)
     }
 
     fn codegen_crate<'tcx>(
@@ -370,19 +387,14 @@ impl CodegenBackend for LlvmCodegenBackend {
         (codegen_results, work_products)
     }
 
-    fn link(
-        &self,
-        sess: &Session,
-        codegen_results: CodegenResults,
-        outputs: &OutputFilenames,
-    ) -> Result<(), ErrorGuaranteed> {
+    fn link(&self, sess: &Session, codegen_results: CodegenResults, outputs: &OutputFilenames) {
         use rustc_codegen_ssa::back::link::link_binary;
 
         use crate::back::archive::LlvmArchiveBuilderBuilder;
 
         // Run the linker on any artifacts that resulted from the LLVM run.
         // This should produce either a finished executable or library.
-        link_binary(sess, &LlvmArchiveBuilderBuilder, codegen_results, outputs)
+        link_binary(sess, &LlvmArchiveBuilderBuilder, codegen_results, outputs);
     }
 }
 
diff --git a/compiler/rustc_codegen_llvm/src/llvm/diagnostic.rs b/compiler/rustc_codegen_llvm/src/llvm/diagnostic.rs
index a4cb5a25d1b..11043b664f5 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/diagnostic.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/diagnostic.rs
@@ -151,7 +151,7 @@ impl InlineAsmDiagnostic {
             unsafe { SrcMgrDiagnostic::unpack(super::LLVMRustGetSMDiagnostic(di, &mut cookie)) };
         InlineAsmDiagnostic {
             level: smdiag.level,
-            cookie: cookie.into(),
+            cookie,
             message: smdiag.message,
             source: smdiag.source,
         }
diff --git a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
new file mode 100644
index 00000000000..729d6f62e24
--- /dev/null
+++ b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
@@ -0,0 +1,29 @@
+#![allow(non_camel_case_types)]
+
+use libc::{c_char, c_uint};
+
+use super::ffi::{BasicBlock, Metadata, Module, Type, Value};
+use crate::llvm::Bool;
+extern "C" {
+    // Enzyme
+    pub fn LLVMRustHasMetadata(I: &Value, KindID: c_uint) -> bool;
+    pub fn LLVMRustEraseInstBefore(BB: &BasicBlock, I: &Value);
+    pub fn LLVMRustGetLastInstruction<'a>(BB: &BasicBlock) -> Option<&'a Value>;
+    pub fn LLVMRustDIGetInstMetadata(I: &Value) -> Option<&Metadata>;
+    pub fn LLVMRustEraseInstFromParent(V: &Value);
+    pub fn LLVMRustGetTerminator<'a>(B: &BasicBlock) -> &'a Value;
+    pub fn LLVMRustVerifyFunction(V: &Value, action: LLVMRustVerifierFailureAction) -> Bool;
+
+    pub fn LLVMGetFunctionCallConv(F: &Value) -> c_uint;
+    pub fn LLVMGetReturnType(T: &Type) -> &Type;
+    pub fn LLVMGetParams(Fnc: &Value, parms: *mut &Value);
+    pub fn LLVMGetNamedFunction(M: &Module, Name: *const c_char) -> Option<&Value>;
+}
+
+#[repr(C)]
+#[derive(Copy, Clone, PartialEq)]
+pub enum LLVMRustVerifierFailureAction {
+    LLVMAbortProcessAction = 0,
+    LLVMPrintMessageAction = 1,
+    LLVMReturnStatusAction = 2,
+}
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index b1ace0033ba..472d4a3a72b 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -17,7 +17,9 @@ use super::debuginfo::{
     DebugEmissionKind, DebugNameTableKind,
 };
 
-pub type Bool = c_uint;
+/// In the LLVM-C API, boolean values are passed as `typedef int LLVMBool`,
+/// which has a different ABI from Rust or C++ `bool`.
+pub type Bool = c_int;
 
 pub const True: Bool = 1 as Bool;
 pub const False: Bool = 0 as Bool;
@@ -54,25 +56,6 @@ pub enum LLVMRustResult {
     Failure,
 }
 
-// Rust version of the C struct with the same name in rustc_llvm/llvm-wrapper/RustWrapper.cpp.
-#[repr(C)]
-pub struct LLVMRustCOFFShortExport {
-    pub name: *const c_char,
-    pub ordinal_present: bool,
-    /// value of `ordinal` only important when `ordinal_present` is true
-    pub ordinal: u16,
-}
-
-impl LLVMRustCOFFShortExport {
-    pub fn new(name: *const c_char, ordinal: Option<u16>) -> LLVMRustCOFFShortExport {
-        LLVMRustCOFFShortExport {
-            name,
-            ordinal_present: ordinal.is_some(),
-            ordinal: ordinal.unwrap_or(0),
-        }
-    }
-}
-
 /// Translation of LLVM's MachineTypes enum, defined in llvm\include\llvm\BinaryFormat\COFF.h.
 ///
 /// We include only architectures supported on Windows.
@@ -116,7 +99,7 @@ pub enum ModuleFlagMergeBehavior {
 /// LLVM CallingConv::ID. Should we wrap this?
 ///
 /// See <https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/IR/CallingConv.h>
-#[derive(Copy, Clone, PartialEq, Debug)]
+#[derive(Copy, Clone, PartialEq, Debug, TryFromU32)]
 #[repr(C)]
 pub enum CallConv {
     CCallConv = 0,
@@ -543,7 +526,7 @@ pub struct SanitizerOptions {
     pub sanitize_kernel_address_recover: bool,
 }
 
-/// LLVMRelocMode
+/// LLVMRustRelocModel
 #[derive(Copy, Clone, PartialEq)]
 #[repr(C)]
 pub enum RelocModel {
@@ -555,6 +538,15 @@ pub enum RelocModel {
     ROPI_RWPI,
 }
 
+/// LLVMRustFloatABI
+#[derive(Copy, Clone, PartialEq)]
+#[repr(C)]
+pub enum FloatAbi {
+    Default,
+    Soft,
+    Hard,
+}
+
 /// LLVMRustCodeModel
 #[derive(Copy, Clone)]
 #[repr(C)]
@@ -2209,7 +2201,7 @@ unsafe extern "C" {
         Model: CodeModel,
         Reloc: RelocModel,
         Level: CodeGenOptLevel,
-        UseSoftFP: bool,
+        FloatABIType: FloatAbi,
         FunctionSections: bool,
         DataSections: bool,
         UniqueSectionNames: bool,
@@ -2317,7 +2309,7 @@ unsafe extern "C" {
 
     pub fn LLVMRustGetSMDiagnostic<'a>(
         DI: &'a DiagnosticInfo,
-        cookie_out: &mut c_uint,
+        cookie_out: &mut u64,
     ) -> &'a SMDiagnostic;
 
     pub fn LLVMRustUnpackSMDiagnostic(
@@ -2345,15 +2337,6 @@ unsafe extern "C" {
     ) -> &'a mut RustArchiveMember<'a>;
     pub fn LLVMRustArchiveMemberFree<'a>(Member: &'a mut RustArchiveMember<'a>);
 
-    pub fn LLVMRustWriteImportLibrary(
-        ImportName: *const c_char,
-        Path: *const c_char,
-        Exports: *const LLVMRustCOFFShortExport,
-        NumExports: usize,
-        Machine: u16,
-        MinGW: bool,
-    ) -> LLVMRustResult;
-
     pub fn LLVMRustSetDataLayoutFromTargetMachine<'a>(M: &'a Module, TM: &'a TargetMachine);
 
     pub fn LLVMRustPositionBuilderAtStart<'a>(B: &Builder<'a>, BB: &'a BasicBlock);
diff --git a/compiler/rustc_codegen_llvm/src/llvm/mod.rs b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
index 909afe35a17..2592a7df95c 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
@@ -22,8 +22,11 @@ use crate::common::AsCCharPtr;
 
 pub mod archive_ro;
 pub mod diagnostic;
+pub mod enzyme_ffi;
 mod ffi;
 
+pub use self::enzyme_ffi::*;
+
 impl LLVMRustResult {
     pub fn into_result(self) -> Result<(), ()> {
         match self {
diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs
index db2b03d9aed..628c0b1c29c 100644
--- a/compiler/rustc_codegen_llvm/src/llvm_util.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs
@@ -15,9 +15,9 @@ use rustc_fs_util::path_to_c_string;
 use rustc_middle::bug;
 use rustc_session::Session;
 use rustc_session::config::{PrintKind, PrintRequest};
-use rustc_span::symbol::Symbol;
+use rustc_span::Symbol;
 use rustc_target::spec::{MergeFunctions, PanicStrategy, SmallDataThresholdSupport};
-use rustc_target::target_features::{RUSTC_SPECIAL_FEATURES, RUSTC_SPECIFIC_FEATURES, Stability};
+use rustc_target::target_features::{RUSTC_SPECIAL_FEATURES, RUSTC_SPECIFIC_FEATURES};
 
 use crate::back::write::create_informational_target_machine;
 use crate::errors::{
@@ -230,6 +230,8 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
         "aarch64"
     } else if sess.target.arch == "sparc64" {
         "sparc"
+    } else if sess.target.arch == "powerpc64" {
+        "powerpc"
     } else {
         &*sess.target.arch
     };
@@ -289,6 +291,7 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
         // https://github.com/llvm/llvm-project/blob/llvmorg-18.1.0/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp#L26
         ("sparc", "v8plus") if get_version().0 == 19 => Some(LLVMFeature::new("v9")),
         ("sparc", "v8plus") if get_version().0 < 19 => None,
+        ("powerpc", "power8-crypto") => Some(LLVMFeature::new("crypto")),
         (_, s) => Some(LLVMFeature::new(s)),
     }
 }
@@ -297,7 +300,7 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
 /// Must express features in the way Rust understands them.
 ///
 /// We do not have to worry about RUSTC_SPECIFIC_FEATURES here, those are handled outside codegen.
-pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
+pub fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
     let mut features: FxHashSet<Symbol> = Default::default();
 
     // Add base features for the target.
@@ -313,7 +316,7 @@ pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
         sess.target
             .rust_target_features()
             .iter()
-            .filter(|(_, gate, _)| gate.is_supported())
+            .filter(|(_, gate, _)| gate.in_cfg())
             .filter(|(feature, _, _)| {
                 // skip checking special features, as LLVM may not understand them
                 if RUSTC_SPECIAL_FEATURES.contains(feature) {
@@ -369,10 +372,10 @@ pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
     sess.target
         .rust_target_features()
         .iter()
-        .filter(|(_, gate, _)| gate.is_supported())
-        .filter_map(|&(feature, gate, _)| {
-            if sess.is_nightly_build() || allow_unstable || gate.is_stable() {
-                Some(feature)
+        .filter(|(_, gate, _)| gate.in_cfg())
+        .filter_map(|(feature, gate, _)| {
+            if sess.is_nightly_build() || allow_unstable || gate.requires_nightly().is_none() {
+                Some(*feature)
             } else {
                 None
             }
@@ -490,7 +493,7 @@ fn print_target_features(sess: &Session, tm: &llvm::TargetMachine, out: &mut Str
         .rust_target_features()
         .iter()
         .filter_map(|(feature, gate, _implied)| {
-            if !gate.is_supported() {
+            if !gate.in_cfg() {
                 // Only list (experimentally) supported features.
                 return None;
             }
@@ -713,13 +716,17 @@ pub(crate) fn global_llvm_features(
                             };
                             sess.dcx().emit_warn(unknown_feature);
                         }
-                        Some((_, Stability::Stable, _)) => {}
-                        Some((_, Stability::Unstable(_), _)) => {
-                            // An unstable feature. Warn about using it.
-                            sess.dcx().emit_warn(UnstableCTargetFeature { feature });
-                        }
-                        Some((_, Stability::Forbidden { reason }, _)) => {
-                            sess.dcx().emit_warn(ForbiddenCTargetFeature { feature, reason });
+                        Some((_, stability, _)) => {
+                            if let Err(reason) =
+                                stability.toggle_allowed(&sess.target, enable_disable == '+')
+                            {
+                                sess.dcx().emit_warn(ForbiddenCTargetFeature { feature, reason });
+                            } else if stability.requires_nightly().is_some() {
+                                // An unstable feature. Warn about using it. It makes little sense
+                                // to hard-error here since we just warn about fully unknown
+                                // features above.
+                                sess.dcx().emit_warn(UnstableCTargetFeature { feature });
+                            }
                         }
                     }
 
diff --git a/compiler/rustc_codegen_llvm/src/type_of.rs b/compiler/rustc_codegen_llvm/src/type_of.rs
index 2b05e24a7ba..b0b6da869da 100644
--- a/compiler/rustc_codegen_llvm/src/type_of.rs
+++ b/compiler/rustc_codegen_llvm/src/type_of.rs
@@ -38,7 +38,7 @@ fn uncached_llvm_type<'a, 'tcx>(
             if let (&ty::Adt(def, _), &Variants::Single { index }) =
                 (layout.ty.kind(), &layout.variants)
             {
-                if def.is_enum() && !def.variants().is_empty() {
+                if def.is_enum() {
                     write!(&mut name, "::{}", def.variant(index).name).unwrap();
                 }
             }