27 files changed, 998 insertions, 303 deletions
diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs
index f84cafb0c4f..95e0481b035 100644
--- a/compiler/rustc_codegen_llvm/src/attributes.rs
+++ b/compiler/rustc_codegen_llvm/src/attributes.rs
@@ -37,7 +37,9 @@ fn inline_attr<'ll>(cx: &CodegenCx<'ll, '_>, inline: InlineAttr) -> Option<&'ll
     }
     match inline {
         InlineAttr::Hint => Some(AttributeKind::InlineHint.create_attr(cx.llcx)),
-        InlineAttr::Always => Some(AttributeKind::AlwaysInline.create_attr(cx.llcx)),
+        InlineAttr::Always | InlineAttr::Force { .. } => {
+            Some(AttributeKind::AlwaysInline.create_attr(cx.llcx))
+        }
         InlineAttr::Never => {
             if cx.sess().target.arch != "amdgpu" {
                 Some(AttributeKind::NoInline.create_attr(cx.llcx))
diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs
index f6d2ec24da6..08b774f8d6e 100644
--- a/compiler/rustc_codegen_llvm/src/back/lto.rs
+++ b/compiler/rustc_codegen_llvm/src/back/lto.rs
@@ -604,7 +604,14 @@ pub(crate) fn run_pass_manager(
     debug!("running the pass manager");
     let opt_stage = if thin { llvm::OptStage::ThinLTO } else { llvm::OptStage::FatLTO };
     let opt_level = config.opt_level.unwrap_or(config::OptLevel::No);
-    unsafe { write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage) }?;
+
+    // If this rustc version was build with enzyme/autodiff enabled, and if users applied the
+    // `#[autodiff]` macro at least once, then we will later call llvm_optimize a second time.
+    let first_run = true;
+    debug!("running llvm pm opt pipeline");
+    unsafe {
+        write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, first_run)?;
+    }
     debug!("lto done");
     Ok(())
 }
@@ -730,11 +737,7 @@ pub(crate) unsafe fn optimize_thin_module(
         {
             let _timer =
                 cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_rename", thin_module.name());
-            if unsafe {
-                !llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target)
-            } {
-                return Err(write::llvm_err(dcx, LlvmError::PrepareThinLtoModule));
-            }
+            unsafe { llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target) };
             save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
         }
 
diff --git a/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs b/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
index 44c30d22a9e..4cbd49aa44d 100644
--- a/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
+++ b/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
@@ -25,7 +25,7 @@ impl OwnedTargetMachine {
         model: llvm::CodeModel,
         reloc: llvm::RelocModel,
         level: llvm::CodeGenOptLevel,
-        use_soft_fp: bool,
+        float_abi: llvm::FloatAbi,
         function_sections: bool,
         data_sections: bool,
         unique_section_names: bool,
@@ -57,7 +57,7 @@ impl OwnedTargetMachine {
                 model,
                 reloc,
                 level,
-                use_soft_fp,
+                float_abi,
                 function_sections,
                 data_sections,
                 unique_section_names,
diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs
index 45294ea35b1..509b24dd703 100644
--- a/compiler/rustc_codegen_llvm/src/back/write.rs
+++ b/compiler/rustc_codegen_llvm/src/back/write.rs
@@ -26,8 +26,8 @@ use rustc_session::config::{
     self, Lto, OutputType, Passes, RemapPathScopeComponents, SplitDwarfKind, SwitchWithOptPath,
 };
 use rustc_span::{BytePos, InnerSpan, Pos, SpanData, SyntaxContext, sym};
-use rustc_target::spec::{CodeModel, RelocModel, SanitizerSet, SplitDebuginfo, TlsModel};
-use tracing::debug;
+use rustc_target::spec::{CodeModel, FloatAbi, RelocModel, SanitizerSet, SplitDebuginfo, TlsModel};
+use tracing::{debug, trace};
 
 use crate::back::lto::ThinBuffer;
 use crate::back::owned_target_machine::OwnedTargetMachine;
@@ -181,6 +181,14 @@ pub(crate) fn to_llvm_code_model(code_model: Option<CodeModel>) -> llvm::CodeMod
     }
 }
 
+fn to_llvm_float_abi(float_abi: Option<FloatAbi>) -> llvm::FloatAbi {
+    match float_abi {
+        None => llvm::FloatAbi::Default,
+        Some(FloatAbi::Soft) => llvm::FloatAbi::Soft,
+        Some(FloatAbi::Hard) => llvm::FloatAbi::Hard,
+    }
+}
+
 pub(crate) fn target_machine_factory(
     sess: &Session,
     optlvl: config::OptLevel,
@@ -189,12 +197,12 @@ pub(crate) fn target_machine_factory(
     let reloc_model = to_llvm_relocation_model(sess.relocation_model());
 
     let (opt_level, _) = to_llvm_opt_settings(optlvl);
-    let use_softfp = if sess.target.arch == "arm" && sess.target.abi == "eabihf" {
-        sess.opts.cg.soft_float
+    let float_abi = if sess.target.arch == "arm" && sess.opts.cg.soft_float {
+        llvm::FloatAbi::Soft
     } else {
         // `validate_commandline_args_with_session_available` has already warned about this being
         // ignored. Let's make sure LLVM doesn't suddenly start using this flag on more targets.
-        false
+        to_llvm_float_abi(sess.target.llvm_floatabi)
     };
 
     let ffunction_sections =
@@ -290,7 +298,7 @@ pub(crate) fn target_machine_factory(
             code_model,
             reloc_model,
             opt_level,
-            use_softfp,
+            float_abi,
             ffunction_sections,
             fdata_sections,
             funique_section_names,
@@ -529,9 +537,35 @@ pub(crate) unsafe fn llvm_optimize(
     config: &ModuleConfig,
     opt_level: config::OptLevel,
     opt_stage: llvm::OptStage,
+    skip_size_increasing_opts: bool,
 ) -> Result<(), FatalError> {
-    let unroll_loops =
-        opt_level != config::OptLevel::Size && opt_level != config::OptLevel::SizeMin;
+    // Enzyme:
+    // The whole point of compiler based AD is to differentiate optimized IR instead of unoptimized
+    // source code. However, benchmarks show that optimizations increasing the code size
+    // tend to reduce AD performance. Therefore deactivate them before AD, then differentiate the code
+    // and finally re-optimize the module, now with all optimizations available.
+    // FIXME(ZuseZ4): In a future update we could figure out how to only optimize individual functions getting
+    // differentiated.
+
+    let unroll_loops;
+    let vectorize_slp;
+    let vectorize_loop;
+
+    // When we build rustc with enzyme/autodiff support, we want to postpone size-increasing
+    // optimizations until after differentiation. FIXME(ZuseZ4): Before shipping on nightly,
+    // we should make this more granular, or at least check that the user has at least one autodiff
+    // call in their code, to justify altering the compilation pipeline.
+    if skip_size_increasing_opts && cfg!(llvm_enzyme) {
+        unroll_loops = false;
+        vectorize_slp = false;
+        vectorize_loop = false;
+    } else {
+        unroll_loops =
+            opt_level != config::OptLevel::Size && opt_level != config::OptLevel::SizeMin;
+        vectorize_slp = config.vectorize_slp;
+        vectorize_loop = config.vectorize_loop;
+    }
+    trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop);
     let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
     let pgo_gen_path = get_pgo_gen_path(config);
     let pgo_use_path = get_pgo_use_path(config);
@@ -595,8 +629,8 @@ pub(crate) unsafe fn llvm_optimize(
             using_thin_buffers,
             config.merge_functions,
             unroll_loops,
-            config.vectorize_slp,
-            config.vectorize_loop,
+            vectorize_slp,
+            vectorize_loop,
             config.no_builtins,
             config.emit_lifetime_markers,
             sanitizer_options.as_ref(),
@@ -640,6 +674,8 @@ pub(crate) unsafe fn optimize(
         unsafe { llvm::LLVMWriteBitcodeToFile(llmod, out.as_ptr()) };
     }
 
+    // FIXME(ZuseZ4): support SanitizeHWAddress and prevent illegal/unsupported opts
+
     if let Some(opt_level) = config.opt_level {
         let opt_stage = match cgcx.lto {
             Lto::Fat => llvm::OptStage::PreLinkFatLTO,
@@ -647,7 +683,20 @@ pub(crate) unsafe fn optimize(
             _ if cgcx.opts.cg.linker_plugin_lto.enabled() => llvm::OptStage::PreLinkThinLTO,
             _ => llvm::OptStage::PreLinkNoLTO,
         };
-        return unsafe { llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage) };
+
+        // If we know that we will later run AD, then we disable vectorization and loop unrolling
+        let skip_size_increasing_opts = cfg!(llvm_enzyme);
+        return unsafe {
+            llvm_optimize(
+                cgcx,
+                dcx,
+                module,
+                config,
+                opt_level,
+                opt_stage,
+                skip_size_increasing_opts,
+            )
+        };
     }
     Ok(())
 }
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index b5bb7630ca6..5a34b52e6ef 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -2,6 +2,8 @@ use std::borrow::Cow;
 use std::ops::Deref;
 use std::{iter, ptr};
 
+pub(crate) mod autodiff;
+
 use libc::{c_char, c_uint};
 use rustc_abi as abi;
 use rustc_abi::{Align, Size, WrappingRange};
@@ -608,14 +610,6 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     }
 
     fn range_metadata(&mut self, load: &'ll Value, range: WrappingRange) {
-        if self.sess().target.arch == "amdgpu" {
-            // amdgpu/LLVM does something weird and thinks an i64 value is
-            // split into a v2i32, halving the bitwidth LLVM expects,
-            // tripping an assertion. So, for now, just disable this
-            // optimization.
-            return;
-        }
-
         if self.cx.sess().opts.optimize == OptLevel::No {
             // Don't emit metadata we're not going to use
             return;
diff --git a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
new file mode 100644
index 00000000000..38f7eaa090f
--- /dev/null
+++ b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
@@ -0,0 +1,344 @@
+use std::ptr;
+
+use rustc_ast::expand::autodiff_attrs::{AutoDiffAttrs, AutoDiffItem, DiffActivity, DiffMode};
+use rustc_codegen_ssa::ModuleCodegen;
+use rustc_codegen_ssa::back::write::ModuleConfig;
+use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, BuilderMethods};
+use rustc_errors::FatalError;
+use rustc_middle::ty::TyCtxt;
+use rustc_session::config::Lto;
+use tracing::{debug, trace};
+
+use crate::back::write::{llvm_err, llvm_optimize};
+use crate::builder::Builder;
+use crate::declare::declare_raw_fn;
+use crate::errors::LlvmError;
+use crate::llvm::AttributePlace::Function;
+use crate::llvm::{Metadata, True};
+use crate::value::Value;
+use crate::{CodegenContext, LlvmCodegenBackend, ModuleLlvm, attributes, context, llvm};
+
+fn get_params(fnc: &Value) -> Vec<&Value> {
+    unsafe {
+        let param_num = llvm::LLVMCountParams(fnc) as usize;
+        let mut fnc_args: Vec<&Value> = vec![];
+        fnc_args.reserve(param_num);
+        llvm::LLVMGetParams(fnc, fnc_args.as_mut_ptr());
+        fnc_args.set_len(param_num);
+        fnc_args
+    }
+}
+
+/// When differentiating `fn_to_diff`, take a `outer_fn` and generate another
+/// function with expected naming and calling conventions[^1] which will be
+/// discovered by the enzyme LLVM pass and its body populated with the differentiated
+/// `fn_to_diff`. `outer_fn` is then modified to have a call to the generated
+/// function and handle the differences between the Rust calling convention and
+/// Enzyme.
+/// [^1]: <https://enzyme.mit.edu/getting_started/CallingConvention/>
+// FIXME(ZuseZ4): `outer_fn` should include upstream safety checks to
+// cover some assumptions of enzyme/autodiff, which could lead to UB otherwise.
+fn generate_enzyme_call<'ll, 'tcx>(
+    cx: &context::CodegenCx<'ll, 'tcx>,
+    fn_to_diff: &'ll Value,
+    outer_fn: &'ll Value,
+    attrs: AutoDiffAttrs,
+) {
+    let inputs = attrs.input_activity;
+    let output = attrs.ret_activity;
+
+    // We have to pick the name depending on whether we want forward or reverse mode autodiff.
+    // FIXME(ZuseZ4): The new pass based approach should not need the {Forward/Reverse}First method anymore, since
+    // it will handle higher-order derivatives correctly automatically (in theory). Currently
+    // higher-order derivatives fail, so we should debug that before adjusting this code.
+    let mut ad_name: String = match attrs.mode {
+        DiffMode::Forward => "__enzyme_fwddiff",
+        DiffMode::Reverse => "__enzyme_autodiff",
+        DiffMode::ForwardFirst => "__enzyme_fwddiff",
+        DiffMode::ReverseFirst => "__enzyme_autodiff",
+        _ => panic!("logic bug in autodiff, unrecognized mode"),
+    }
+    .to_string();
+
+    // add outer_fn name to ad_name to make it unique, in case users apply autodiff to multiple
+    // functions. Unwrap will only panic, if LLVM gave us an invalid string.
+    let name = llvm::get_value_name(outer_fn);
+    let outer_fn_name = std::ffi::CStr::from_bytes_with_nul(name).unwrap().to_str().unwrap();
+    ad_name.push_str(outer_fn_name.to_string().as_str());
+
+    // Let us assume the user wrote the following function square:
+    //
+    // ```llvm
+    // define double @square(double %x) {
+    // entry:
+    //  %0 = fmul double %x, %x
+    //  ret double %0
+    // }
+    // ```
+    //
+    // The user now applies autodiff to the function square, in which case fn_to_diff will be `square`.
+    // Our macro generates the following placeholder code (slightly simplified):
+    //
+    // ```llvm
+    // define double @dsquare(double %x) {
+    //  ; placeholder code
+    //  return 0.0;
+    // }
+    // ```
+    //
+    // so our `outer_fn` will be `dsquare`. The unsafe code section below now removes the placeholder
+    // code and inserts an autodiff call. We also add a declaration for the __enzyme_autodiff call.
+    // Again, the arguments to all functions are slightly simplified.
+    // ```llvm
+    // declare double @__enzyme_autodiff_square(...)
+    //
+    // define double @dsquare(double %x) {
+    // entry:
+    //   %0 = tail call double (...) @__enzyme_autodiff_square(double (double)* nonnull @square, double %x)
+    //   ret double %0
+    // }
+    // ```
+    unsafe {
+        // On LLVM-IR, we can luckily declare __enzyme_ functions without specifying the input
+        // arguments. We do however need to declare them with their correct return type.
+        // We already figured the correct return type out in our frontend, when generating the outer_fn,
+        // so we can now just go ahead and use that. FIXME(ZuseZ4): This doesn't handle sret yet.
+        let fn_ty = llvm::LLVMGlobalGetValueType(outer_fn);
+        let ret_ty = llvm::LLVMGetReturnType(fn_ty);
+
+        // LLVM can figure out the input types on it's own, so we take a shortcut here.
+        let enzyme_ty = llvm::LLVMFunctionType(ret_ty, ptr::null(), 0, True);
+
+        //FIXME(ZuseZ4): the CC/Addr/Vis values are best effort guesses, we should look at tests and
+        // think a bit more about what should go here.
+        let cc = llvm::LLVMGetFunctionCallConv(outer_fn);
+        let ad_fn = declare_raw_fn(
+            cx,
+            &ad_name,
+            llvm::CallConv::try_from(cc).expect("invalid callconv"),
+            llvm::UnnamedAddr::No,
+            llvm::Visibility::Default,
+            enzyme_ty,
+        );
+
+        // Otherwise LLVM might inline our temporary code before the enzyme pass has a chance to
+        // do it's work.
+        let attr = llvm::AttributeKind::NoInline.create_attr(cx.llcx);
+        attributes::apply_to_llfn(ad_fn, Function, &[attr]);
+
+        // first, remove all calls from fnc
+        let entry = llvm::LLVMGetFirstBasicBlock(outer_fn);
+        let br = llvm::LLVMRustGetTerminator(entry);
+        llvm::LLVMRustEraseInstFromParent(br);
+
+        let last_inst = llvm::LLVMRustGetLastInstruction(entry).unwrap();
+        let mut builder = Builder::build(cx, entry);
+
+        let num_args = llvm::LLVMCountParams(&fn_to_diff);
+        let mut args = Vec::with_capacity(num_args as usize + 1);
+        args.push(fn_to_diff);
+
+        let enzyme_const = cx.create_metadata("enzyme_const".to_string()).unwrap();
+        let enzyme_out = cx.create_metadata("enzyme_out".to_string()).unwrap();
+        let enzyme_dup = cx.create_metadata("enzyme_dup".to_string()).unwrap();
+        let enzyme_dupnoneed = cx.create_metadata("enzyme_dupnoneed".to_string()).unwrap();
+        let enzyme_primal_ret = cx.create_metadata("enzyme_primal_return".to_string()).unwrap();
+
+        match output {
+            DiffActivity::Dual => {
+                args.push(cx.get_metadata_value(enzyme_primal_ret));
+            }
+            DiffActivity::Active => {
+                args.push(cx.get_metadata_value(enzyme_primal_ret));
+            }
+            _ => {}
+        }
+
+        trace!("matching autodiff arguments");
+        // We now handle the issue that Rust level arguments not always match the llvm-ir level
+        // arguments. A slice, `&[f32]`, for example, is represented as a pointer and a length on
+        // llvm-ir level. The number of activities matches the number of Rust level arguments, so we
+        // need to match those.
+        // FIXME(ZuseZ4): This logic is a bit more complicated than it should be, can we simplify it
+        // using iterators and peek()?
+        let mut outer_pos: usize = 0;
+        let mut activity_pos = 0;
+        let outer_args: Vec<&llvm::Value> = get_params(outer_fn);
+        while activity_pos < inputs.len() {
+            let activity = inputs[activity_pos as usize];
+            // Duplicated arguments received a shadow argument, into which enzyme will write the
+            // gradient.
+            let (activity, duplicated): (&Metadata, bool) = match activity {
+                DiffActivity::None => panic!("not a valid input activity"),
+                DiffActivity::Const => (enzyme_const, false),
+                DiffActivity::Active => (enzyme_out, false),
+                DiffActivity::ActiveOnly => (enzyme_out, false),
+                DiffActivity::Dual => (enzyme_dup, true),
+                DiffActivity::DualOnly => (enzyme_dupnoneed, true),
+                DiffActivity::Duplicated => (enzyme_dup, true),
+                DiffActivity::DuplicatedOnly => (enzyme_dupnoneed, true),
+                DiffActivity::FakeActivitySize => (enzyme_const, false),
+            };
+            let outer_arg = outer_args[outer_pos];
+            args.push(cx.get_metadata_value(activity));
+            args.push(outer_arg);
+            if duplicated {
+                // We know that duplicated args by construction have a following argument,
+                // so this can not be out of bounds.
+                let next_outer_arg = outer_args[outer_pos + 1];
+                let next_outer_ty = cx.val_ty(next_outer_arg);
+                // FIXME(ZuseZ4): We should add support for Vec here too, but it's less urgent since
+                // vectors behind references (&Vec<T>) are already supported. Users can not pass a
+                // Vec by value for reverse mode, so this would only help forward mode autodiff.
+                let slice = {
+                    if activity_pos + 1 >= inputs.len() {
+                        // If there is no arg following our ptr, it also can't be a slice,
+                        // since that would lead to a ptr, int pair.
+                        false
+                    } else {
+                        let next_activity = inputs[activity_pos + 1];
+                        // We analyze the MIR types and add this dummy activity if we visit a slice.
+                        next_activity == DiffActivity::FakeActivitySize
+                    }
+                };
+                if slice {
+                    // A duplicated slice will have the following two outer_fn arguments:
+                    // (..., ptr1, int1, ptr2, int2, ...). We add the following llvm-ir to our __enzyme call:
+                    // (..., metadata! enzyme_dup, ptr, ptr, int1, ...).
+                    // FIXME(ZuseZ4): We will upstream a safety check later which asserts that
+                    // int2 >= int1, which means the shadow vector is large enough to store the gradient.
+                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Integer);
+                    let next_outer_arg2 = outer_args[outer_pos + 2];
+                    let next_outer_ty2 = cx.val_ty(next_outer_arg2);
+                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty2) == llvm::TypeKind::Pointer);
+                    let next_outer_arg3 = outer_args[outer_pos + 3];
+                    let next_outer_ty3 = cx.val_ty(next_outer_arg3);
+                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty3) == llvm::TypeKind::Integer);
+                    args.push(next_outer_arg2);
+                    args.push(cx.get_metadata_value(enzyme_const));
+                    args.push(next_outer_arg);
+                    outer_pos += 4;
+                    activity_pos += 2;
+                } else {
+                    // A duplicated pointer will have the following two outer_fn arguments:
+                    // (..., ptr, ptr, ...). We add the following llvm-ir to our __enzyme call:
+                    // (..., metadata! enzyme_dup, ptr, ptr, ...).
+                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Pointer);
+                    args.push(next_outer_arg);
+                    outer_pos += 2;
+                    activity_pos += 1;
+                }
+            } else {
+                // We do not differentiate with resprect to this argument.
+                // We already added the metadata and argument above, so just increase the counters.
+                outer_pos += 1;
+                activity_pos += 1;
+            }
+        }
+
+        let call = builder.call(enzyme_ty, None, None, ad_fn, &args, None, None);
+
+        // This part is a bit iffy. LLVM requires that a call to an inlineable function has some
+        // metadata attachted to it, but we just created this code oota. Given that the
+        // differentiated function already has partly confusing metadata, and given that this
+        // affects nothing but the auttodiff IR, we take a shortcut and just steal metadata from the
+        // dummy code which we inserted at a higher level.
+        // FIXME(ZuseZ4): Work with Enzyme core devs to clarify what debug metadata issues we have,
+        // and how to best improve it for enzyme core and rust-enzyme.
+        let md_ty = cx.get_md_kind_id("dbg");
+        if llvm::LLVMRustHasMetadata(last_inst, md_ty) {
+            let md = llvm::LLVMRustDIGetInstMetadata(last_inst)
+                .expect("failed to get instruction metadata");
+            let md_todiff = cx.get_metadata_value(md);
+            llvm::LLVMSetMetadata(call, md_ty, md_todiff);
+        } else {
+            // We don't panic, since depending on whether we are in debug or release mode, we might
+            // have no debug info to copy, which would then be ok.
+            trace!("no dbg info");
+        }
+        // Now that we copied the metadata, get rid of dummy code.
+        llvm::LLVMRustEraseInstBefore(entry, last_inst);
+        llvm::LLVMRustEraseInstFromParent(last_inst);
+
+        if cx.val_ty(outer_fn) != cx.type_void() {
+            builder.ret(call);
+        } else {
+            builder.ret_void();
+        }
+
+        // Let's crash in case that we messed something up above and generated invalid IR.
+        llvm::LLVMRustVerifyFunction(
+            outer_fn,
+            llvm::LLVMRustVerifierFailureAction::LLVMAbortProcessAction,
+        );
+    }
+}
+
+pub(crate) fn differentiate<'ll, 'tcx>(
+    module: &'ll ModuleCodegen<ModuleLlvm>,
+    cgcx: &CodegenContext<LlvmCodegenBackend>,
+    tcx: TyCtxt<'tcx>,
+    diff_items: Vec<AutoDiffItem>,
+    config: &ModuleConfig,
+) -> Result<(), FatalError> {
+    for item in &diff_items {
+        trace!("{}", item);
+    }
+
+    let diag_handler = cgcx.create_dcx();
+    let (_, cgus) = tcx.collect_and_partition_mono_items(());
+    let cx = context::CodegenCx::new(tcx, &cgus.first().unwrap(), &module.module_llvm);
+
+    // Before dumping the module, we want all the TypeTrees to become part of the module.
+    for item in diff_items.iter() {
+        let name = item.source.clone();
+        let fn_def: Option<&llvm::Value> = cx.get_function(&name);
+        let Some(fn_def) = fn_def else {
+            return Err(llvm_err(diag_handler.handle(), LlvmError::PrepareAutoDiff {
+                src: item.source.clone(),
+                target: item.target.clone(),
+                error: "could not find source function".to_owned(),
+            }));
+        };
+        debug!(?item.target);
+        let fn_target: Option<&llvm::Value> = cx.get_function(&item.target);
+        let Some(fn_target) = fn_target else {
+            return Err(llvm_err(diag_handler.handle(), LlvmError::PrepareAutoDiff {
+                src: item.source.clone(),
+                target: item.target.clone(),
+                error: "could not find target function".to_owned(),
+            }));
+        };
+
+        generate_enzyme_call(&cx, fn_def, fn_target, item.attrs.clone());
+    }
+
+    // FIXME(ZuseZ4): support SanitizeHWAddress and prevent illegal/unsupported opts
+
+    if let Some(opt_level) = config.opt_level {
+        let opt_stage = match cgcx.lto {
+            Lto::Fat => llvm::OptStage::PreLinkFatLTO,
+            Lto::Thin | Lto::ThinLocal => llvm::OptStage::PreLinkThinLTO,
+            _ if cgcx.opts.cg.linker_plugin_lto.enabled() => llvm::OptStage::PreLinkThinLTO,
+            _ => llvm::OptStage::PreLinkNoLTO,
+        };
+        // This is our second opt call, so now we run all opts,
+        // to make sure we get the best performance.
+        let skip_size_increasing_opts = false;
+        trace!("running Module Optimization after differentiation");
+        unsafe {
+            llvm_optimize(
+                cgcx,
+                diag_handler.handle(),
+                module,
+                config,
+                opt_level,
+                opt_stage,
+                skip_size_increasing_opts,
+            )?
+        };
+    }
+    trace!("done with differentiate()");
+
+    Ok(())
+}
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index c602d99ff9d..d8fbe51b975 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -1,6 +1,6 @@
 use std::borrow::Borrow;
 use std::cell::{Cell, RefCell};
-use std::ffi::{CStr, c_uint};
+use std::ffi::{CStr, c_char, c_uint};
 use std::str;
 
 use rustc_abi::{HasDataLayout, TargetDataLayout, VariantIdx};
@@ -600,6 +600,31 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
             llvm::set_section(g, c"llvm.metadata");
         }
     }
+
+    pub(crate) fn get_metadata_value(&self, metadata: &'ll Metadata) -> &'ll Value {
+        unsafe { llvm::LLVMMetadataAsValue(self.llcx, metadata) }
+    }
+
+    pub(crate) fn get_function(&self, name: &str) -> Option<&'ll Value> {
+        let name = SmallCStr::new(name);
+        unsafe { llvm::LLVMGetNamedFunction(self.llmod, name.as_ptr()) }
+    }
+
+    pub(crate) fn get_md_kind_id(&self, name: &str) -> u32 {
+        unsafe {
+            llvm::LLVMGetMDKindIDInContext(
+                self.llcx,
+                name.as_ptr() as *const c_char,
+                name.len() as c_uint,
+            )
+        }
+    }
+
+    pub(crate) fn create_metadata(&self, name: String) -> Option<&'ll Metadata> {
+        Some(unsafe {
+            llvm::LLVMMDStringInContext2(self.llcx, name.as_ptr() as *const c_char, name.len())
+        })
+    }
 }
 
 impl<'ll, 'tcx> MiscCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs
index 1f133141c18..b617f4d37f5 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs
@@ -1,6 +1,4 @@
-use rustc_middle::mir::coverage::{CounterId, CovTerm, ExpressionId, SourceRegion};
-
-use crate::coverageinfo::mapgen::LocalFileId;
+use rustc_middle::mir::coverage::{CounterId, CovTerm, ExpressionId};
 
 /// Must match the layout of `LLVMRustCounterKind`.
 #[derive(Copy, Clone, Debug)]
@@ -126,30 +124,16 @@ pub(crate) struct CoverageSpan {
     /// Local index into the function's local-to-global file ID table.
     /// The value at that index is itself an index into the coverage filename
     /// table in the CGU's `__llvm_covmap` section.
-    file_id: u32,
+    pub(crate) file_id: u32,
 
     /// 1-based starting line of the source code span.
-    start_line: u32,
+    pub(crate) start_line: u32,
     /// 1-based starting column of the source code span.
-    start_col: u32,
+    pub(crate) start_col: u32,
     /// 1-based ending line of the source code span.
-    end_line: u32,
+    pub(crate) end_line: u32,
     /// 1-based ending column of the source code span. High bit must be unset.
-    end_col: u32,
-}
-
-impl CoverageSpan {
-    pub(crate) fn from_source_region(
-        local_file_id: LocalFileId,
-        code_region: &SourceRegion,
-    ) -> Self {
-        let file_id = local_file_id.as_u32();
-        let &SourceRegion { start_line, start_col, end_line, end_col } = code_region;
-        // Internally, LLVM uses the high bit of `end_col` to distinguish between
-        // code regions and gap regions, so it can't be used by the column number.
-        assert!(end_col & (1u32 << 31) == 0, "high bit of `end_col` must be unset: {end_col:#X}");
-        Self { file_id, start_line, start_col, end_line, end_col }
-    }
+    pub(crate) end_col: u32,
 }
 
 /// Holds tables of the various region types in one struct.
@@ -184,7 +168,7 @@ impl Regions {
 #[derive(Clone, Debug)]
 #[repr(C)]
 pub(crate) struct CodeRegion {
-    pub(crate) span: CoverageSpan,
+    pub(crate) cov_span: CoverageSpan,
     pub(crate) counter: Counter,
 }
 
@@ -192,7 +176,7 @@ pub(crate) struct CodeRegion {
 #[derive(Clone, Debug)]
 #[repr(C)]
 pub(crate) struct BranchRegion {
-    pub(crate) span: CoverageSpan,
+    pub(crate) cov_span: CoverageSpan,
     pub(crate) true_counter: Counter,
     pub(crate) false_counter: Counter,
 }
@@ -201,7 +185,7 @@ pub(crate) struct BranchRegion {
 #[derive(Clone, Debug)]
 #[repr(C)]
 pub(crate) struct MCDCBranchRegion {
-    pub(crate) span: CoverageSpan,
+    pub(crate) cov_span: CoverageSpan,
     pub(crate) true_counter: Counter,
     pub(crate) false_counter: Counter,
     pub(crate) mcdc_branch_params: mcdc::BranchParameters,
@@ -211,6 +195,6 @@ pub(crate) struct MCDCBranchRegion {
 #[derive(Clone, Debug)]
 #[repr(C)]
 pub(crate) struct MCDCDecisionRegion {
-    pub(crate) span: CoverageSpan,
+    pub(crate) cov_span: CoverageSpan,
     pub(crate) mcdc_decision_params: mcdc::DecisionParameters,
 }
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs
index 086cf1f44a0..2cd7fa3225a 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs
@@ -40,11 +40,10 @@ pub(crate) fn create_pgo_func_name_var<'ll>(
     }
 }
 
-pub(crate) fn write_filenames_to_buffer<'a>(
-    filenames: impl IntoIterator<Item = &'a str>,
-) -> Vec<u8> {
+pub(crate) fn write_filenames_to_buffer(filenames: &[impl AsRef<str>]) -> Vec<u8> {
     let (pointers, lengths) = filenames
         .into_iter()
+        .map(AsRef::as_ref)
         .map(|s: &str| (s.as_c_char_ptr(), s.len()))
         .unzip::<_, _, Vec<_>, Vec<_>>();
 
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
index ca334286200..b3ad2a0e409 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
@@ -1,11 +1,11 @@
-use std::iter;
+use std::sync::Arc;
 
 use itertools::Itertools;
 use rustc_abi::Align;
 use rustc_codegen_ssa::traits::{
     BaseTypeCodegenMethods, ConstCodegenMethods, StaticCodegenMethods,
 };
-use rustc_data_structures::fx::{FxHashSet, FxIndexMap, FxIndexSet};
+use rustc_data_structures::fx::{FxHashSet, FxIndexMap};
 use rustc_hir::def_id::{DefId, LocalDefId};
 use rustc_index::IndexVec;
 use rustc_middle::mir;
@@ -13,7 +13,7 @@ use rustc_middle::ty::{self, TyCtxt};
 use rustc_session::RemapFileNameExt;
 use rustc_session::config::RemapPathScopeComponents;
 use rustc_span::def_id::DefIdSet;
-use rustc_span::{Span, Symbol};
+use rustc_span::{SourceFile, StableSourceFileId};
 use tracing::debug;
 
 use crate::common::CodegenCx;
@@ -22,6 +22,7 @@ use crate::coverageinfo::mapgen::covfun::prepare_covfun_record;
 use crate::llvm;
 
 mod covfun;
+mod spans;
 
 /// Generates and exports the coverage map, which is embedded in special
 /// linker sections in the final binary.
@@ -131,45 +132,51 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
     generate_covmap_record(cx, covmap_version, &filenames_buffer);
 }
 
-/// Maps "global" (per-CGU) file ID numbers to their underlying filenames.
+/// Maps "global" (per-CGU) file ID numbers to their underlying source files.
 struct GlobalFileTable {
-    /// This "raw" table doesn't include the working dir, so a filename's
+    /// This "raw" table doesn't include the working dir, so a file's
     /// global ID is its index in this set **plus one**.
-    raw_file_table: FxIndexSet<Symbol>,
+    raw_file_table: FxIndexMap<StableSourceFileId, Arc<SourceFile>>,
 }
 
 impl GlobalFileTable {
     fn new() -> Self {
-        Self { raw_file_table: FxIndexSet::default() }
+        Self { raw_file_table: FxIndexMap::default() }
     }
 
-    fn global_file_id_for_file_name(&mut self, file_name: Symbol) -> GlobalFileId {
+    fn global_file_id_for_file(&mut self, file: &Arc<SourceFile>) -> GlobalFileId {
         // Ensure the given file has a table entry, and get its index.
-        let (raw_id, _) = self.raw_file_table.insert_full(file_name);
+        let entry = self.raw_file_table.entry(file.stable_id);
+        let raw_id = entry.index();
+        entry.or_insert_with(|| Arc::clone(file));
+
         // The raw file table doesn't include an entry for the working dir
         // (which has ID 0), so add 1 to get the correct ID.
         GlobalFileId::from_usize(raw_id + 1)
     }
 
     fn make_filenames_buffer(&self, tcx: TyCtxt<'_>) -> Vec<u8> {
+        let mut table = Vec::with_capacity(self.raw_file_table.len() + 1);
+
         // LLVM Coverage Mapping Format version 6 (zero-based encoded as 5)
         // requires setting the first filename to the compilation directory.
         // Since rustc generates coverage maps with relative paths, the
         // compilation directory can be combined with the relative paths
         // to get absolute paths, if needed.
-        use rustc_session::RemapFileNameExt;
-        use rustc_session::config::RemapPathScopeComponents;
-        let working_dir: &str = &tcx
-            .sess
-            .opts
-            .working_dir
-            .for_scope(tcx.sess, RemapPathScopeComponents::MACRO)
-            .to_string_lossy();
-
-        // Insert the working dir at index 0, before the other filenames.
-        let filenames =
-            iter::once(working_dir).chain(self.raw_file_table.iter().map(Symbol::as_str));
-        llvm_cov::write_filenames_to_buffer(filenames)
+        table.push(
+            tcx.sess
+                .opts
+                .working_dir
+                .for_scope(tcx.sess, RemapPathScopeComponents::MACRO)
+                .to_string_lossy(),
+        );
+
+        // Add the regular entries after the base directory.
+        table.extend(self.raw_file_table.values().map(|file| {
+            file.name.for_scope(tcx.sess, RemapPathScopeComponents::MACRO).to_string_lossy()
+        }));
+
+        llvm_cov::write_filenames_to_buffer(&table)
     }
 }
 
@@ -182,7 +189,7 @@ rustc_index::newtype_index! {
     /// An index into a function's list of global file IDs. That underlying list
     /// of local-to-global mappings will be embedded in the function's record in
     /// the `__llvm_covfun` linker section.
-    pub(crate) struct LocalFileId {}
+    struct LocalFileId {}
 }
 
 /// Holds a mapping from "local" (per-function) file IDs to "global" (per-CGU)
@@ -208,13 +215,6 @@ impl VirtualFileMapping {
     }
 }
 
-fn span_file_name(tcx: TyCtxt<'_>, span: Span) -> Symbol {
-    let source_file = tcx.sess.source_map().lookup_source_file(span.lo());
-    let name =
-        source_file.name.for_scope(tcx.sess, RemapPathScopeComponents::MACRO).to_string_lossy();
-    Symbol::intern(&name)
-}
-
 /// Generates the contents of the covmap record for this CGU, which mostly
 /// consists of a header and a list of filenames. The record is then stored
 /// as a global variable in the `__llvm_covmap` section.
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
index 8e853f057be..5428d776f41 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
@@ -10,16 +10,16 @@ use rustc_abi::Align;
 use rustc_codegen_ssa::traits::{
     BaseTypeCodegenMethods, ConstCodegenMethods, StaticCodegenMethods,
 };
-use rustc_middle::bug;
 use rustc_middle::mir::coverage::{
     CovTerm, CoverageIdsInfo, Expression, FunctionCoverageInfo, Mapping, MappingKind, Op,
 };
 use rustc_middle::ty::{Instance, TyCtxt};
+use rustc_span::Span;
 use rustc_target::spec::HasTargetSpec;
 use tracing::debug;
 
 use crate::common::CodegenCx;
-use crate::coverageinfo::mapgen::{GlobalFileTable, VirtualFileMapping, span_file_name};
+use crate::coverageinfo::mapgen::{GlobalFileTable, VirtualFileMapping, spans};
 use crate::coverageinfo::{ffi, llvm_cov};
 use crate::llvm;
 
@@ -67,12 +67,8 @@ pub(crate) fn prepare_covfun_record<'tcx>(
     fill_region_tables(tcx, global_file_table, fn_cov_info, ids_info, &mut covfun);
 
     if covfun.regions.has_no_regions() {
-        if covfun.is_used {
-            bug!("a used function should have had coverage mapping data but did not: {covfun:?}");
-        } else {
-            debug!(?covfun, "unused function had no coverage mapping data");
-            return None;
-        }
+        debug!(?covfun, "function has no mappings to embed; skipping");
+        return None;
     }
 
     Some(covfun)
@@ -121,41 +117,58 @@ fn fill_region_tables<'tcx>(
     covfun: &mut CovfunRecord<'tcx>,
 ) {
     // Currently a function's mappings must all be in the same file as its body span.
-    let file_name = span_file_name(tcx, fn_cov_info.body_span);
+    let source_map = tcx.sess.source_map();
+    let source_file = source_map.lookup_source_file(fn_cov_info.body_span.lo());
 
-    // Look up the global file ID for that filename.
-    let global_file_id = global_file_table.global_file_id_for_file_name(file_name);
+    // Look up the global file ID for that file.
+    let global_file_id = global_file_table.global_file_id_for_file(&source_file);
 
     // Associate that global file ID with a local file ID for this function.
     let local_file_id = covfun.virtual_file_mapping.local_id_for_global(global_file_id);
-    debug!("  file id: {local_file_id:?} => {global_file_id:?} = '{file_name:?}'");
 
     let ffi::Regions { code_regions, branch_regions, mcdc_branch_regions, mcdc_decision_regions } =
         &mut covfun.regions;
 
+    let make_cov_span = |span: Span| {
+        spans::make_coverage_span(local_file_id, source_map, fn_cov_info, &source_file, span)
+    };
+    let discard_all = tcx.sess.coverage_discard_all_spans_in_codegen();
+
     // For each counter/region pair in this function+file, convert it to a
     // form suitable for FFI.
     let is_zero_term = |term| !covfun.is_used || ids_info.is_zero_term(term);
-    for Mapping { kind, ref source_region } in &fn_cov_info.mappings {
+    for &Mapping { ref kind, span } in &fn_cov_info.mappings {
         // If the mapping refers to counters/expressions that were removed by
         // MIR opts, replace those occurrences with zero.
         let kind = kind.map_terms(|term| if is_zero_term(term) { CovTerm::Zero } else { term });
 
-        let span = ffi::CoverageSpan::from_source_region(local_file_id, source_region);
+        // Convert the `Span` into coordinates that we can pass to LLVM, or
+        // discard the span if conversion fails. In rare, cases _all_ of a
+        // function's spans are discarded, and the rest of coverage codegen
+        // needs to handle that gracefully to avoid a repeat of #133606.
+        // We don't have a good test case for triggering that organically, so
+        // instead we set `-Zcoverage-options=discard-all-spans-in-codegen`
+        // to force it to occur.
+        let Some(cov_span) = make_cov_span(span) else { continue };
+        if discard_all {
+            continue;
+        }
+
         match kind {
             MappingKind::Code(term) => {
-                code_regions.push(ffi::CodeRegion { span, counter: ffi::Counter::from_term(term) });
+                code_regions
+                    .push(ffi::CodeRegion { cov_span, counter: ffi::Counter::from_term(term) });
             }
             MappingKind::Branch { true_term, false_term } => {
                 branch_regions.push(ffi::BranchRegion {
-                    span,
+                    cov_span,
                     true_counter: ffi::Counter::from_term(true_term),
                     false_counter: ffi::Counter::from_term(false_term),
                 });
             }
             MappingKind::MCDCBranch { true_term, false_term, mcdc_params } => {
                 mcdc_branch_regions.push(ffi::MCDCBranchRegion {
-                    span,
+                    cov_span,
                     true_counter: ffi::Counter::from_term(true_term),
                     false_counter: ffi::Counter::from_term(false_term),
                     mcdc_branch_params: ffi::mcdc::BranchParameters::from(mcdc_params),
@@ -163,7 +176,7 @@ fn fill_region_tables<'tcx>(
             }
             MappingKind::MCDCDecision(mcdc_decision_params) => {
                 mcdc_decision_regions.push(ffi::MCDCDecisionRegion {
-                    span,
+                    cov_span,
                     mcdc_decision_params: ffi::mcdc::DecisionParameters::from(mcdc_decision_params),
                 });
             }
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs
new file mode 100644
index 00000000000..6d1d91340c2
--- /dev/null
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs
@@ -0,0 +1,126 @@
+use rustc_middle::mir::coverage::FunctionCoverageInfo;
+use rustc_span::source_map::SourceMap;
+use rustc_span::{BytePos, Pos, SourceFile, Span};
+use tracing::debug;
+
+use crate::coverageinfo::ffi;
+use crate::coverageinfo::mapgen::LocalFileId;
+
+/// Converts the span into its start line and column, and end line and column.
+///
+/// Line numbers and column numbers are 1-based. Unlike most column numbers emitted by
+/// the compiler, these column numbers are denoted in **bytes**, because that's what
+/// LLVM's `llvm-cov` tool expects to see in coverage maps.
+///
+/// Returns `None` if the conversion failed for some reason. This shouldn't happen,
+/// but it's hard to rule out entirely (especially in the presence of complex macros
+/// or other expansions), and if it does happen then skipping a span or function is
+/// better than an ICE or `llvm-cov` failure that the user might have no way to avoid.
+pub(crate) fn make_coverage_span(
+    file_id: LocalFileId,
+    source_map: &SourceMap,
+    fn_cov_info: &FunctionCoverageInfo,
+    file: &SourceFile,
+    span: Span,
+) -> Option<ffi::CoverageSpan> {
+    let span = ensure_non_empty_span(source_map, fn_cov_info, span)?;
+
+    let lo = span.lo();
+    let hi = span.hi();
+
+    // Column numbers need to be in bytes, so we can't use the more convenient
+    // `SourceMap` methods for looking up file coordinates.
+    let line_and_byte_column = |pos: BytePos| -> Option<(usize, usize)> {
+        let rpos = file.relative_position(pos);
+        let line_index = file.lookup_line(rpos)?;
+        let line_start = file.lines()[line_index];
+        // Line numbers and column numbers are 1-based, so add 1 to each.
+        Some((line_index + 1, (rpos - line_start).to_usize() + 1))
+    };
+
+    let (mut start_line, start_col) = line_and_byte_column(lo)?;
+    let (mut end_line, end_col) = line_and_byte_column(hi)?;
+
+    // Apply an offset so that code in doctests has correct line numbers.
+    // FIXME(#79417): Currently we have no way to offset doctest _columns_.
+    start_line = source_map.doctest_offset_line(&file.name, start_line);
+    end_line = source_map.doctest_offset_line(&file.name, end_line);
+
+    check_coverage_span(ffi::CoverageSpan {
+        file_id: file_id.as_u32(),
+        start_line: start_line as u32,
+        start_col: start_col as u32,
+        end_line: end_line as u32,
+        end_col: end_col as u32,
+    })
+}
+
+fn ensure_non_empty_span(
+    source_map: &SourceMap,
+    fn_cov_info: &FunctionCoverageInfo,
+    span: Span,
+) -> Option<Span> {
+    if !span.is_empty() {
+        return Some(span);
+    }
+
+    let lo = span.lo();
+    let hi = span.hi();
+
+    // The span is empty, so try to expand it to cover an adjacent '{' or '}',
+    // but only within the bounds of the body span.
+    let try_next = hi < fn_cov_info.body_span.hi();
+    let try_prev = fn_cov_info.body_span.lo() < lo;
+    if !(try_next || try_prev) {
+        return None;
+    }
+
+    source_map
+        .span_to_source(span, |src, start, end| try {
+            // Adjusting span endpoints by `BytePos(1)` is normally a bug,
+            // but in this case we have specifically checked that the character
+            // we're skipping over is one of two specific ASCII characters, so
+            // adjusting by exactly 1 byte is correct.
+            if try_next && src.as_bytes()[end] == b'{' {
+                Some(span.with_hi(hi + BytePos(1)))
+            } else if try_prev && src.as_bytes()[start - 1] == b'}' {
+                Some(span.with_lo(lo - BytePos(1)))
+            } else {
+                None
+            }
+        })
+        .ok()?
+}
+
+/// If `llvm-cov` sees a source region that is improperly ordered (end < start),
+/// it will immediately exit with a fatal error. To prevent that from happening,
+/// discard regions that are improperly ordered, or might be interpreted in a
+/// way that makes them improperly ordered.
+fn check_coverage_span(cov_span: ffi::CoverageSpan) -> Option<ffi::CoverageSpan> {
+    let ffi::CoverageSpan { file_id: _, start_line, start_col, end_line, end_col } = cov_span;
+
+    // Line/column coordinates are supposed to be 1-based. If we ever emit
+    // coordinates of 0, `llvm-cov` might misinterpret them.
+    let all_nonzero = [start_line, start_col, end_line, end_col].into_iter().all(|x| x != 0);
+    // Coverage mappings use the high bit of `end_col` to indicate that a
+    // region is actually a "gap" region, so make sure it's unset.
+    let end_col_has_high_bit_unset = (end_col & (1 << 31)) == 0;
+    // If a region is improperly ordered (end < start), `llvm-cov` will exit
+    // with a fatal error, which is inconvenient for users and hard to debug.
+    let is_ordered = (start_line, start_col) <= (end_line, end_col);
+
+    if all_nonzero && end_col_has_high_bit_unset && is_ordered {
+        Some(cov_span)
+    } else {
+        debug!(
+            ?cov_span,
+            ?all_nonzero,
+            ?end_col_has_high_bit_unset,
+            ?is_ordered,
+            "Skipping source region that would be misinterpreted or rejected by LLVM"
+        );
+        // If this happens in a debug build, ICE to make it easier to notice.
+        debug_assert!(false, "Improper source region: {cov_span:?}");
+        None
+    }
+}
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/dwarf_const.rs b/compiler/rustc_codegen_llvm/src/debuginfo/dwarf_const.rs
new file mode 100644
index 00000000000..40842915222
--- /dev/null
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/dwarf_const.rs
@@ -0,0 +1,37 @@
+//! Definitions of various DWARF-related constants.
+
+use libc::c_uint;
+
+/// Helper macro to let us redeclare gimli's constants as our own constants
+/// with a different type, with less risk of copy-paste errors.
+macro_rules! declare_constant {
+    (
+        $name:ident : $type:ty
+    ) => {
+        #[allow(non_upper_case_globals)]
+        pub(crate) const $name: $type = ::gimli::constants::$name.0 as $type;
+
+        // Assert that as-cast probably hasn't changed the value.
+        const _: () = assert!($name as i128 == ::gimli::constants::$name.0 as i128);
+    };
+}
+
+declare_constant!(DW_TAG_const_type: c_uint);
+
+// DWARF languages.
+declare_constant!(DW_LANG_Rust: c_uint);
+
+// DWARF attribute type encodings.
+declare_constant!(DW_ATE_boolean: c_uint);
+declare_constant!(DW_ATE_float: c_uint);
+declare_constant!(DW_ATE_signed: c_uint);
+declare_constant!(DW_ATE_unsigned: c_uint);
+declare_constant!(DW_ATE_UTF: c_uint);
+
+// DWARF expression operators.
+declare_constant!(DW_OP_deref: u64);
+declare_constant!(DW_OP_plus_uconst: u64);
+/// Defined by LLVM in `llvm/include/llvm/BinaryFormat/Dwarf.h`.
+/// Double-checked by a static assertion in `RustWrapper.cpp`.
+#[allow(non_upper_case_globals)]
+pub(crate) const DW_OP_LLVM_fragment: u64 = 0x1000;
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
index 40248a9009a..88e43e1c678 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
@@ -22,6 +22,7 @@ use rustc_target::spec::DebuginfoKind;
 use smallvec::smallvec;
 use tracing::{debug, instrument};
 
+pub(crate) use self::type_map::TypeMap;
 use self::type_map::{DINodeCreationResult, Stub, UniqueTypeId};
 use super::CodegenUnitDebugContext;
 use super::namespace::mangled_name_of_instance;
@@ -30,6 +31,7 @@ use super::utils::{
     DIB, create_DIArray, debug_context, get_namespace_for_item, is_node_local_to_unit,
 };
 use crate::common::{AsCCharPtr, CodegenCx};
+use crate::debuginfo::dwarf_const;
 use crate::debuginfo::metadata::type_map::build_type_with_children;
 use crate::debuginfo::utils::{WidePtrKind, wide_pointer_kind};
 use crate::llvm::debuginfo::{
@@ -59,20 +61,6 @@ impl fmt::Debug for llvm::Metadata {
     }
 }
 
-// From DWARF 5.
-// See http://www.dwarfstd.org/ShowIssue.php?issue=140129.1.
-const DW_LANG_RUST: c_uint = 0x1c;
-#[allow(non_upper_case_globals)]
-const DW_ATE_boolean: c_uint = 0x02;
-#[allow(non_upper_case_globals)]
-const DW_ATE_float: c_uint = 0x04;
-#[allow(non_upper_case_globals)]
-const DW_ATE_signed: c_uint = 0x05;
-#[allow(non_upper_case_globals)]
-const DW_ATE_unsigned: c_uint = 0x07;
-#[allow(non_upper_case_globals)]
-const DW_ATE_UTF: c_uint = 0x10;
-
 pub(super) const UNKNOWN_LINE_NUMBER: c_uint = 0;
 pub(super) const UNKNOWN_COLUMN_NUMBER: c_uint = 0;
 
@@ -87,8 +75,6 @@ type SmallVec<T> = smallvec::SmallVec<[T; 16]>;
 mod enums;
 mod type_map;
 
-pub(crate) use type_map::TypeMap;
-
 /// Returns from the enclosing function if the type debuginfo node with the given
 /// unique ID can be found in the type map.
 macro_rules! return_if_di_node_created_in_meantime {
@@ -519,7 +505,7 @@ fn recursion_marker_type_di_node<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>) -> &'ll D
                 name.as_c_char_ptr(),
                 name.len(),
                 cx.tcx.data_layout.pointer_size.bits(),
-                DW_ATE_unsigned,
+                dwarf_const::DW_ATE_unsigned,
             )
         }
     })
@@ -778,6 +764,8 @@ fn build_basic_type_di_node<'ll, 'tcx>(
     // .natvis visualizers (and perhaps other existing native debuggers?)
     let cpp_like_debuginfo = cpp_like_debuginfo(cx.tcx);
 
+    use dwarf_const::{DW_ATE_UTF, DW_ATE_boolean, DW_ATE_float, DW_ATE_signed, DW_ATE_unsigned};
+
     let (name, encoding) = match t.kind() {
         ty::Never => ("!", DW_ATE_unsigned),
         ty::Tuple(elements) if elements.is_empty() => {
@@ -958,7 +946,7 @@ pub(crate) fn build_compile_unit_di_node<'ll, 'tcx>(
 
         let unit_metadata = llvm::LLVMRustDIBuilderCreateCompileUnit(
             debug_context.builder,
-            DW_LANG_RUST,
+            dwarf_const::DW_LANG_Rust,
             compile_unit_file,
             producer.as_c_char_ptr(),
             producer.len(),
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs
index d374767f187..a72e205c9b2 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs
@@ -12,6 +12,7 @@ use rustc_middle::ty::{self, AdtDef, CoroutineArgs, CoroutineArgsExt, Ty};
 use smallvec::smallvec;
 
 use crate::common::{AsCCharPtr, CodegenCx};
+use crate::debuginfo::dwarf_const::DW_TAG_const_type;
 use crate::debuginfo::metadata::enums::DiscrResult;
 use crate::debuginfo::metadata::type_map::{self, Stub, UniqueTypeId};
 use crate::debuginfo::metadata::{
@@ -212,21 +213,17 @@ pub(super) fn build_enum_type_di_node<'ll, 'tcx>(
         ),
         |cx, enum_type_di_node| {
             match enum_type_and_layout.variants {
-                Variants::Single { index: variant_index } => {
-                    if enum_adt_def.variants().is_empty() {
-                        // Uninhabited enums have Variants::Single. We don't generate
-                        // any members for them.
-                        return smallvec![];
-                    }
-
-                    build_single_variant_union_fields(
-                        cx,
-                        enum_adt_def,
-                        enum_type_and_layout,
-                        enum_type_di_node,
-                        variant_index,
-                    )
+                Variants::Empty => {
+                    // We don't generate any members for uninhabited types.
+                    return smallvec![];
                 }
+                Variants::Single { index: variant_index } => build_single_variant_union_fields(
+                    cx,
+                    enum_adt_def,
+                    enum_type_and_layout,
+                    enum_type_di_node,
+                    variant_index,
+                ),
                 Variants::Multiple {
                     tag_encoding: TagEncoding::Direct,
                     ref variants,
@@ -303,6 +300,7 @@ pub(super) fn build_coroutine_di_node<'ll, 'tcx>(
                 )
             }
             Variants::Single { .. }
+            | Variants::Empty
             | Variants::Multiple { tag_encoding: TagEncoding::Niche { .. }, .. } => {
                 bug!(
                     "Encountered coroutine with non-direct-tag layout: {:?}",
@@ -569,22 +567,39 @@ fn build_variant_struct_wrapper_type_di_node<'ll, 'tcx>(
                 None,
             ));
 
-            let build_assoc_const =
-                |name: &str, type_di_node: &'ll DIType, value: u64, align: Align| unsafe {
-                    llvm::LLVMRustDIBuilderCreateStaticMemberType(
-                        DIB(cx),
-                        wrapper_struct_type_di_node,
-                        name.as_c_char_ptr(),
-                        name.len(),
-                        unknown_file_metadata(cx),
-                        UNKNOWN_LINE_NUMBER,
-                        type_di_node,
-                        DIFlags::FlagZero,
-                        Some(cx.const_u64(value)),
-                        align.bits() as u32,
-                    )
+            let build_assoc_const = |name: &str,
+                                     type_di_node_: &'ll DIType,
+                                     value: u64,
+                                     align: Align| unsafe {
+                // FIXME: Currently we force all DISCR_* values to be u64's as LLDB seems to have
+                // problems inspecting other value types. Since DISCR_* is typically only going to be
+                // directly inspected via the debugger visualizer - which compares it to the `tag` value
+                // (whose type is not modified at all) it shouldn't cause any real problems.
+                let (t_di, align) = if name == ASSOC_CONST_DISCR_NAME {
+                    (type_di_node_, align.bits() as u32)
+                } else {
+                    let ty_u64 = Ty::new_uint(cx.tcx, ty::UintTy::U64);
+                    (type_di_node(cx, ty_u64), Align::EIGHT.bits() as u32)
                 };
 
+                // must wrap type in a `const` modifier for LLDB to be able to inspect the value of the member
+                let field_type =
+                    llvm::LLVMRustDIBuilderCreateQualifiedType(DIB(cx), DW_TAG_const_type, t_di);
+
+                llvm::LLVMRustDIBuilderCreateStaticMemberType(
+                    DIB(cx),
+                    wrapper_struct_type_di_node,
+                    name.as_c_char_ptr(),
+                    name.len(),
+                    unknown_file_metadata(cx),
+                    UNKNOWN_LINE_NUMBER,
+                    field_type,
+                    DIFlags::FlagZero,
+                    Some(cx.const_u64(value)),
+                    align,
+                )
+            };
+
             // We also always have an associated constant for the discriminant value
             // of the variant.
             fields.push(build_assoc_const(
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs
index 65ab22ad89e..9f6a5cc89e0 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs
@@ -392,7 +392,7 @@ fn compute_discriminant_value<'ll, 'tcx>(
     variant_index: VariantIdx,
 ) -> DiscrResult {
     match enum_type_and_layout.layout.variants() {
-        &Variants::Single { .. } => DiscrResult::NoDiscriminant,
+        &Variants::Single { .. } | &Variants::Empty => DiscrResult::NoDiscriminant,
         &Variants::Multiple { tag_encoding: TagEncoding::Direct, .. } => DiscrResult::Value(
             enum_type_and_layout.ty.discriminant_for_variant(cx.tcx, variant_index).unwrap().val,
         ),
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs
index 241bf167a81..11824398f24 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs
@@ -358,8 +358,8 @@ fn build_discr_member_di_node<'ll, 'tcx>(
     let containing_scope = enum_or_coroutine_type_di_node;
 
     match enum_or_coroutine_type_and_layout.layout.variants() {
-        // A single-variant enum has no discriminant.
-        &Variants::Single { .. } => None,
+        // A single-variant or no-variant enum has no discriminant.
+        &Variants::Single { .. } | &Variants::Empty => None,
 
         &Variants::Multiple { tag_field, .. } => {
             let tag_base_type = tag_base_type(cx.tcx, enum_or_coroutine_type_and_layout);
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
index fae698bea2a..755f4816acf 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
@@ -39,6 +39,7 @@ use crate::llvm::debuginfo::{
 use crate::value::Value;
 
 mod create_scope_map;
+mod dwarf_const;
 mod gdb;
 pub(crate) mod metadata;
 mod namespace;
@@ -47,6 +48,10 @@ mod utils;
 use self::create_scope_map::compute_mir_scopes;
 pub(crate) use self::metadata::build_global_var_di_node;
 
+// FIXME(Zalathar): These `DW_TAG_*` constants are fake values that were
+// removed from LLVM in 2015, and are only used by our own `RustWrapper.cpp`
+// to decide which C++ API to call. Instead, we should just have two separate
+// FFI functions and choose the correct one on the Rust side.
 #[allow(non_upper_case_globals)]
 const DW_TAG_auto_variable: c_uint = 0x100;
 #[allow(non_upper_case_globals)]
@@ -152,29 +157,26 @@ impl<'ll> DebugInfoBuilderMethods for Builder<'_, 'll, '_> {
         indirect_offsets: &[Size],
         fragment: Option<Range<Size>>,
     ) {
+        use dwarf_const::{DW_OP_LLVM_fragment, DW_OP_deref, DW_OP_plus_uconst};
+
         // Convert the direct and indirect offsets and fragment byte range to address ops.
-        // FIXME(eddyb) use `const`s instead of getting the values via FFI,
-        // the values should match the ones in the DWARF standard anyway.
-        let op_deref = || unsafe { llvm::LLVMRustDIBuilderCreateOpDeref() };
-        let op_plus_uconst = || unsafe { llvm::LLVMRustDIBuilderCreateOpPlusUconst() };
-        let op_llvm_fragment = || unsafe { llvm::LLVMRustDIBuilderCreateOpLLVMFragment() };
         let mut addr_ops = SmallVec::<[u64; 8]>::new();
 
         if direct_offset.bytes() > 0 {
-            addr_ops.push(op_plus_uconst());
+            addr_ops.push(DW_OP_plus_uconst);
             addr_ops.push(direct_offset.bytes() as u64);
         }
         for &offset in indirect_offsets {
-            addr_ops.push(op_deref());
+            addr_ops.push(DW_OP_deref);
             if offset.bytes() > 0 {
-                addr_ops.push(op_plus_uconst());
+                addr_ops.push(DW_OP_plus_uconst);
                 addr_ops.push(offset.bytes() as u64);
             }
         }
         if let Some(fragment) = fragment {
             // `DW_OP_LLVM_fragment` takes as arguments the fragment's
             // offset and size, both of them in bits.
-            addr_ops.push(op_llvm_fragment());
+            addr_ops.push(DW_OP_LLVM_fragment);
             addr_ops.push(fragment.start.bits() as u64);
             addr_ops.push((fragment.end - fragment.start).bits() as u64);
         }
diff --git a/compiler/rustc_codegen_llvm/src/declare.rs b/compiler/rustc_codegen_llvm/src/declare.rs
index d338c848754..3ec386f6b07 100644
--- a/compiler/rustc_codegen_llvm/src/declare.rs
+++ b/compiler/rustc_codegen_llvm/src/declare.rs
@@ -32,7 +32,7 @@ use crate::{attributes, llvm};
 ///
 /// If there’s a value with the same name already declared, the function will
 /// update the declaration and return existing Value instead.
-fn declare_raw_fn<'ll>(
+pub(crate) fn declare_raw_fn<'ll>(
     cx: &CodegenCx<'ll, '_>,
     name: &str,
     callconv: llvm::CallConv,
diff --git a/compiler/rustc_codegen_llvm/src/errors.rs b/compiler/rustc_codegen_llvm/src/errors.rs
index 3cdb5b971d9..f4c9491f758 100644
--- a/compiler/rustc_codegen_llvm/src/errors.rs
+++ b/compiler/rustc_codegen_llvm/src/errors.rs
@@ -37,6 +37,7 @@ pub(crate) struct UnstableCTargetFeature<'a> {
 #[note(codegen_llvm_forbidden_ctarget_feature_issue)]
 pub(crate) struct ForbiddenCTargetFeature<'a> {
     pub feature: &'a str,
+    pub enabled: &'a str,
     pub reason: &'a str,
 }
 
@@ -90,6 +91,11 @@ impl<G: EmissionGuarantee> Diagnostic<'_, G> for ParseTargetMachineConfig<'_> {
 }
 
 #[derive(Diagnostic)]
+#[diag(codegen_llvm_autodiff_without_lto)]
+#[note]
+pub(crate) struct AutoDiffWithoutLTO;
+
+#[derive(Diagnostic)]
 #[diag(codegen_llvm_lto_disallowed)]
 pub(crate) struct LtoDisallowed;
 
@@ -131,6 +137,8 @@ pub enum LlvmError<'a> {
     PrepareThinLtoModule,
     #[diag(codegen_llvm_parse_bitcode)]
     ParseBitcode,
+    #[diag(codegen_llvm_prepare_autodiff)]
+    PrepareAutoDiff { src: String, target: String, error: String },
 }
 
 pub(crate) struct WithLlvmError<'a>(pub LlvmError<'a>, pub String);
@@ -152,6 +160,7 @@ impl<G: EmissionGuarantee> Diagnostic<'_, G> for WithLlvmError<'_> {
             }
             PrepareThinLtoModule => fluent::codegen_llvm_prepare_thin_lto_module_with_llvm_err,
             ParseBitcode => fluent::codegen_llvm_parse_bitcode_with_llvm_err,
+            PrepareAutoDiff { .. } => fluent::codegen_llvm_prepare_autodiff_with_llvm_err,
         };
         self.0
             .into_diag(dcx, level)
@@ -206,12 +215,6 @@ pub(crate) struct MismatchedDataLayout<'a> {
 }
 
 #[derive(Diagnostic)]
-#[diag(codegen_llvm_invalid_target_feature_prefix)]
-pub(crate) struct InvalidTargetFeaturePrefix<'a> {
-    pub feature: &'a str,
-}
-
-#[derive(Diagnostic)]
 #[diag(codegen_llvm_fixed_x18_invalid_arch)]
 pub(crate) struct FixedX18InvalidArch<'a> {
     pub arch: &'a str,
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index c38c5d4c644..cabcfc9b42b 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -340,6 +340,37 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                     self.const_i32(cache_type),
                 ])
             }
+            sym::carrying_mul_add => {
+                let (size, signed) = fn_args.type_at(0).int_size_and_signed(self.tcx);
+
+                let wide_llty = self.type_ix(size.bits() * 2);
+                let args = args.as_array().unwrap();
+                let [a, b, c, d] = args.map(|a| self.intcast(a.immediate(), wide_llty, signed));
+
+                let wide = if signed {
+                    let prod = self.unchecked_smul(a, b);
+                    let acc = self.unchecked_sadd(prod, c);
+                    self.unchecked_sadd(acc, d)
+                } else {
+                    let prod = self.unchecked_umul(a, b);
+                    let acc = self.unchecked_uadd(prod, c);
+                    self.unchecked_uadd(acc, d)
+                };
+
+                let narrow_llty = self.type_ix(size.bits());
+                let low = self.trunc(wide, narrow_llty);
+                let bits_const = self.const_uint(wide_llty, size.bits());
+                // No need for ashr when signed; LLVM changes it to lshr anyway.
+                let high = self.lshr(wide, bits_const);
+                // FIXME: could be `trunc nuw`, even for signed.
+                let high = self.trunc(high, narrow_llty);
+
+                let pair_llty = self.type_struct(&[narrow_llty, narrow_llty], false);
+                let pair = self.const_poison(pair_llty);
+                let pair = self.insert_value(pair, low, 0);
+                let pair = self.insert_value(pair, high, 1);
+                pair
+            }
             sym::ctlz
             | sym::ctlz_nonzero
             | sym::cttz
diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
index b079eb8fe0c..06afe8bb3ad 100644
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -17,6 +17,8 @@
 #![feature(iter_intersperse)]
 #![feature(let_chains)]
 #![feature(rustdoc_internals)]
+#![feature(slice_as_array)]
+#![feature(try_blocks)]
 #![warn(unreachable_pub)]
 // tidy-alphabetical-end
 
@@ -26,9 +28,10 @@ use std::mem::ManuallyDrop;
 
 use back::owned_target_machine::OwnedTargetMachine;
 use back::write::{create_informational_target_machine, create_target_machine};
-use errors::ParseTargetMachineConfig;
+use errors::{AutoDiffWithoutLTO, ParseTargetMachineConfig};
 pub use llvm_util::target_features_cfg;
 use rustc_ast::expand::allocator::AllocatorKind;
+use rustc_ast::expand::autodiff_attrs::AutoDiffItem;
 use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule};
 use rustc_codegen_ssa::back::write::{
     CodegenContext, FatLtoInput, ModuleConfig, TargetMachineFactoryConfig, TargetMachineFactoryFn,
@@ -42,7 +45,7 @@ use rustc_middle::dep_graph::{WorkProduct, WorkProductId};
 use rustc_middle::ty::TyCtxt;
 use rustc_middle::util::Providers;
 use rustc_session::Session;
-use rustc_session::config::{OptLevel, OutputFilenames, PrintKind, PrintRequest};
+use rustc_session::config::{Lto, OptLevel, OutputFilenames, PrintKind, PrintRequest};
 use rustc_span::Symbol;
 
 mod back {
@@ -231,6 +234,20 @@ impl WriteBackendMethods for LlvmCodegenBackend {
     fn serialize_module(module: ModuleCodegen<Self::Module>) -> (String, Self::ModuleBuffer) {
         (module.name, back::lto::ModuleBuffer::new(module.module_llvm.llmod()))
     }
+    /// Generate autodiff rules
+    fn autodiff(
+        cgcx: &CodegenContext<Self>,
+        tcx: TyCtxt<'_>,
+        module: &ModuleCodegen<Self::Module>,
+        diff_fncs: Vec<AutoDiffItem>,
+        config: &ModuleConfig,
+    ) -> Result<(), FatalError> {
+        if cgcx.lto != Lto::Fat {
+            let dcx = cgcx.create_dcx();
+            return Err(dcx.handle().emit_almost_fatal(AutoDiffWithoutLTO));
+        }
+        builder::autodiff::differentiate(module, cgcx, tcx, diff_fncs, config)
+    }
 }
 
 unsafe impl Send for LlvmCodegenBackend {} // Llvm is on a per-thread basis
diff --git a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
new file mode 100644
index 00000000000..729d6f62e24
--- /dev/null
+++ b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
@@ -0,0 +1,29 @@
+#![allow(non_camel_case_types)]
+
+use libc::{c_char, c_uint};
+
+use super::ffi::{BasicBlock, Metadata, Module, Type, Value};
+use crate::llvm::Bool;
+extern "C" {
+    // Enzyme
+    pub fn LLVMRustHasMetadata(I: &Value, KindID: c_uint) -> bool;
+    pub fn LLVMRustEraseInstBefore(BB: &BasicBlock, I: &Value);
+    pub fn LLVMRustGetLastInstruction<'a>(BB: &BasicBlock) -> Option<&'a Value>;
+    pub fn LLVMRustDIGetInstMetadata(I: &Value) -> Option<&Metadata>;
+    pub fn LLVMRustEraseInstFromParent(V: &Value);
+    pub fn LLVMRustGetTerminator<'a>(B: &BasicBlock) -> &'a Value;
+    pub fn LLVMRustVerifyFunction(V: &Value, action: LLVMRustVerifierFailureAction) -> Bool;
+
+    pub fn LLVMGetFunctionCallConv(F: &Value) -> c_uint;
+    pub fn LLVMGetReturnType(T: &Type) -> &Type;
+    pub fn LLVMGetParams(Fnc: &Value, parms: *mut &Value);
+    pub fn LLVMGetNamedFunction(M: &Module, Name: *const c_char) -> Option<&Value>;
+}
+
+#[repr(C)]
+#[derive(Copy, Clone, PartialEq)]
+pub enum LLVMRustVerifierFailureAction {
+    LLVMAbortProcessAction = 0,
+    LLVMPrintMessageAction = 1,
+    LLVMReturnStatusAction = 2,
+}
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index d62632d1431..cb4a8c9a5f2 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -56,25 +56,6 @@ pub enum LLVMRustResult {
     Failure,
 }
 
-// Rust version of the C struct with the same name in rustc_llvm/llvm-wrapper/RustWrapper.cpp.
-#[repr(C)]
-pub struct LLVMRustCOFFShortExport {
-    pub name: *const c_char,
-    pub ordinal_present: bool,
-    /// value of `ordinal` only important when `ordinal_present` is true
-    pub ordinal: u16,
-}
-
-impl LLVMRustCOFFShortExport {
-    pub fn new(name: *const c_char, ordinal: Option<u16>) -> LLVMRustCOFFShortExport {
-        LLVMRustCOFFShortExport {
-            name,
-            ordinal_present: ordinal.is_some(),
-            ordinal: ordinal.unwrap_or(0),
-        }
-    }
-}
-
 /// Translation of LLVM's MachineTypes enum, defined in llvm\include\llvm\BinaryFormat\COFF.h.
 ///
 /// We include only architectures supported on Windows.
@@ -118,7 +99,7 @@ pub enum ModuleFlagMergeBehavior {
 /// LLVM CallingConv::ID. Should we wrap this?
 ///
 /// See <https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/IR/CallingConv.h>
-#[derive(Copy, Clone, PartialEq, Debug)]
+#[derive(Copy, Clone, PartialEq, Debug, TryFromU32)]
 #[repr(C)]
 pub enum CallConv {
     CCallConv = 0,
@@ -545,7 +526,7 @@ pub struct SanitizerOptions {
     pub sanitize_kernel_address_recover: bool,
 }
 
-/// LLVMRelocMode
+/// LLVMRustRelocModel
 #[derive(Copy, Clone, PartialEq)]
 #[repr(C)]
 pub enum RelocModel {
@@ -557,6 +538,15 @@ pub enum RelocModel {
     ROPI_RWPI,
 }
 
+/// LLVMRustFloatABI
+#[derive(Copy, Clone, PartialEq)]
+#[repr(C)]
+pub enum FloatAbi {
+    Default,
+    Soft,
+    Hard,
+}
+
 /// LLVMRustCodeModel
 #[derive(Copy, Clone)]
 #[repr(C)]
@@ -2018,6 +2008,12 @@ unsafe extern "C" {
         AlignInBits: u32,
     ) -> &'a DIDerivedType;
 
+    pub fn LLVMRustDIBuilderCreateQualifiedType<'a>(
+        Builder: &DIBuilder<'a>,
+        Tag: c_uint,
+        Type: &'a DIType,
+    ) -> &'a DIDerivedType;
+
     pub fn LLVMRustDIBuilderCreateLexicalBlock<'a>(
         Builder: &DIBuilder<'a>,
         Scope: &'a DIScope,
@@ -2181,9 +2177,6 @@ unsafe extern "C" {
         Location: &'a DILocation,
         BD: c_uint,
     ) -> Option<&'a DILocation>;
-    pub fn LLVMRustDIBuilderCreateOpDeref() -> u64;
-    pub fn LLVMRustDIBuilderCreateOpPlusUconst() -> u64;
-    pub fn LLVMRustDIBuilderCreateOpLLVMFragment() -> u64;
 
     pub fn LLVMRustWriteTypeToString(Type: &Type, s: &RustString);
     pub fn LLVMRustWriteValueToString(value_ref: &Value, s: &RustString);
@@ -2211,7 +2204,7 @@ unsafe extern "C" {
         Model: CodeModel,
         Reloc: RelocModel,
         Level: CodeGenOptLevel,
-        UseSoftFP: bool,
+        FloatABIType: FloatAbi,
         FunctionSections: bool,
         DataSections: bool,
         UniqueSectionNames: bool,
@@ -2347,15 +2340,6 @@ unsafe extern "C" {
     ) -> &'a mut RustArchiveMember<'a>;
     pub fn LLVMRustArchiveMemberFree<'a>(Member: &'a mut RustArchiveMember<'a>);
 
-    pub fn LLVMRustWriteImportLibrary(
-        ImportName: *const c_char,
-        Path: *const c_char,
-        Exports: *const LLVMRustCOFFShortExport,
-        NumExports: usize,
-        Machine: u16,
-        MinGW: bool,
-    ) -> LLVMRustResult;
-
     pub fn LLVMRustSetDataLayoutFromTargetMachine<'a>(M: &'a Module, TM: &'a TargetMachine);
 
     pub fn LLVMRustPositionBuilderAtStart<'a>(B: &Builder<'a>, BB: &'a BasicBlock);
@@ -2390,7 +2374,7 @@ unsafe extern "C" {
         Data: &ThinLTOData,
         Module: &Module,
         Target: &TargetMachine,
-    ) -> bool;
+    );
     pub fn LLVMRustPrepareThinLTOResolveWeak(Data: &ThinLTOData, Module: &Module) -> bool;
     pub fn LLVMRustPrepareThinLTOInternalize(Data: &ThinLTOData, Module: &Module) -> bool;
     pub fn LLVMRustPrepareThinLTOImport(
diff --git a/compiler/rustc_codegen_llvm/src/llvm/mod.rs b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
index 909afe35a17..2592a7df95c 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
@@ -22,8 +22,11 @@ use crate::common::AsCCharPtr;
 
 pub mod archive_ro;
 pub mod diagnostic;
+pub mod enzyme_ffi;
 mod ffi;
 
+pub use self::enzyme_ffi::*;
+
 impl LLVMRustResult {
     pub fn into_result(self) -> Result<(), ()> {
         match self {
diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs
index 628c0b1c29c..c3d7c217861 100644
--- a/compiler/rustc_codegen_llvm/src/llvm_util.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs
@@ -21,8 +21,8 @@ use rustc_target::target_features::{RUSTC_SPECIAL_FEATURES, RUSTC_SPECIFIC_FEATU
 
 use crate::back::write::create_informational_target_machine;
 use crate::errors::{
-    FixedX18InvalidArch, ForbiddenCTargetFeature, InvalidTargetFeaturePrefix, PossibleFeature,
-    UnknownCTargetFeature, UnknownCTargetFeaturePrefix, UnstableCTargetFeature,
+    FixedX18InvalidArch, ForbiddenCTargetFeature, PossibleFeature, UnknownCTargetFeature,
+    UnknownCTargetFeaturePrefix, UnstableCTargetFeature,
 };
 use crate::llvm;
 
@@ -109,7 +109,10 @@ unsafe fn configure_llvm(sess: &Session) {
             add("-wasm-enable-eh", false);
         }
 
-        if sess.target.os == "emscripten" && sess.panic_strategy() == PanicStrategy::Unwind {
+        if sess.target.os == "emscripten"
+            && !sess.opts.unstable_opts.emscripten_wasm_eh
+            && sess.panic_strategy() == PanicStrategy::Unwind
+        {
             add("-enable-emscripten-cxx-exceptions", false);
         }
 
@@ -348,7 +351,16 @@ pub fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol>
     {
         if enabled {
             // Also add all transitively implied features.
-            features.extend(sess.target.implied_target_features(std::iter::once(feature)));
+
+            // We don't care about the order in `features` since the only thing we use it for is the
+            // `features.contains` below.
+            #[allow(rustc::potential_query_instability)]
+            features.extend(
+                sess.target
+                    .implied_target_features(std::iter::once(feature.as_str()))
+                    .iter()
+                    .map(|s| Symbol::intern(s)),
+            );
         } else {
             // Remove transitively reverse-implied features.
 
@@ -356,7 +368,11 @@ pub fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol>
             // `features.contains` below.
             #[allow(rustc::potential_query_instability)]
             features.retain(|f| {
-                if sess.target.implied_target_features(std::iter::once(*f)).contains(&feature) {
+                if sess
+                    .target
+                    .implied_target_features(std::iter::once(f.as_str()))
+                    .contains(&feature.as_str())
+                {
                     // If `f` if implies `feature`, then `!feature` implies `!f`, so we have to
                     // remove `f`. (This is the standard logical contraposition principle.)
                     false
@@ -638,7 +654,7 @@ pub(crate) fn global_llvm_features(
         sess.target
             .features
             .split(',')
-            .filter(|v| !v.is_empty() && backend_feature_name(sess, v).is_some())
+            .filter(|v| !v.is_empty())
             // Drop +v8plus feature introduced in LLVM 20.
             .filter(|v| *v != "+v8plus" || get_version() >= (20, 0, 0))
             .map(String::from),
@@ -651,89 +667,136 @@ pub(crate) fn global_llvm_features(
     // -Ctarget-features
     if !only_base_features {
         let known_features = sess.target.rust_target_features();
+        // Will only be filled when `diagnostics` is set!
         let mut featsmap = FxHashMap::default();
 
-        // insert implied features
+        // Ensure that all ABI-required features are enabled, and the ABI-forbidden ones
+        // are disabled.
+        let abi_feature_constraints = sess.target.abi_required_features();
+        let abi_incompatible_set =
+            FxHashSet::from_iter(abi_feature_constraints.incompatible.iter().copied());
+
+        // Compute implied features
         let mut all_rust_features = vec![];
         for feature in sess.opts.cg.target_feature.split(',') {
-            match feature.strip_prefix('+') {
-                Some(feature) => all_rust_features.extend(
-                    UnordSet::from(
-                        sess.target
-                            .implied_target_features(std::iter::once(Symbol::intern(feature))),
-                    )
-                    .to_sorted_stable_ord()
-                    .iter()
-                    .map(|s| format!("+{}", s.as_str())),
-                ),
-                _ => all_rust_features.push(feature.to_string()),
+            if let Some(feature) = feature.strip_prefix('+') {
+                all_rust_features.extend(
+                    UnordSet::from(sess.target.implied_target_features(std::iter::once(feature)))
+                        .to_sorted_stable_ord()
+                        .iter()
+                        .map(|&&s| (true, s)),
+                )
+            } else if let Some(feature) = feature.strip_prefix('-') {
+                // FIXME: Why do we not remove implied features on "-" here?
+                // We do the equivalent above in `target_features_cfg`.
+                // See <https://github.com/rust-lang/rust/issues/134792>.
+                all_rust_features.push((false, feature));
+            } else if !feature.is_empty() {
+                if diagnostics {
+                    sess.dcx().emit_warn(UnknownCTargetFeaturePrefix { feature });
+                }
             }
         }
+        // Remove features that are meant for rustc, not LLVM.
+        all_rust_features.retain(|(_, feature)| {
+            // Retain if it is not a rustc feature
+            !RUSTC_SPECIFIC_FEATURES.contains(feature)
+        });
 
-        let feats = all_rust_features
-            .iter()
-            .filter_map(|s| {
-                let enable_disable = match s.chars().next() {
-                    None => return None,
-                    Some(c @ ('+' | '-')) => c,
-                    Some(_) => {
-                        if diagnostics {
-                            sess.dcx().emit_warn(UnknownCTargetFeaturePrefix { feature: s });
-                        }
-                        return None;
-                    }
-                };
-
-                // Get the backend feature name, if any.
-                // This excludes rustc-specific features, which do not get passed to LLVM.
-                let feature = backend_feature_name(sess, s)?;
-                // Warn against use of LLVM specific feature names and unstable features on the CLI.
-                if diagnostics {
-                    let feature_state = known_features.iter().find(|&&(v, _, _)| v == feature);
-                    match feature_state {
-                        None => {
-                            let rust_feature =
-                                known_features.iter().find_map(|&(rust_feature, _, _)| {
-                                    let llvm_features = to_llvm_features(sess, rust_feature)?;
-                                    if llvm_features.contains(feature)
-                                        && !llvm_features.contains(rust_feature)
-                                    {
-                                        Some(rust_feature)
-                                    } else {
-                                        None
-                                    }
-                                });
-                            let unknown_feature = if let Some(rust_feature) = rust_feature {
-                                UnknownCTargetFeature {
-                                    feature,
-                                    rust_feature: PossibleFeature::Some { rust_feature },
-                                }
-                            } else {
-                                UnknownCTargetFeature {
-                                    feature,
-                                    rust_feature: PossibleFeature::None,
+        // Check feature validity.
+        if diagnostics {
+            for &(enable, feature) in &all_rust_features {
+                let feature_state = known_features.iter().find(|&&(v, _, _)| v == feature);
+                match feature_state {
+                    None => {
+                        let rust_feature =
+                            known_features.iter().find_map(|&(rust_feature, _, _)| {
+                                let llvm_features = to_llvm_features(sess, rust_feature)?;
+                                if llvm_features.contains(feature)
+                                    && !llvm_features.contains(rust_feature)
+                                {
+                                    Some(rust_feature)
+                                } else {
+                                    None
                                 }
-                            };
-                            sess.dcx().emit_warn(unknown_feature);
-                        }
-                        Some((_, stability, _)) => {
-                            if let Err(reason) =
-                                stability.toggle_allowed(&sess.target, enable_disable == '+')
-                            {
-                                sess.dcx().emit_warn(ForbiddenCTargetFeature { feature, reason });
-                            } else if stability.requires_nightly().is_some() {
-                                // An unstable feature. Warn about using it. It makes little sense
-                                // to hard-error here since we just warn about fully unknown
-                                // features above.
-                                sess.dcx().emit_warn(UnstableCTargetFeature { feature });
+                            });
+                        let unknown_feature = if let Some(rust_feature) = rust_feature {
+                            UnknownCTargetFeature {
+                                feature,
+                                rust_feature: PossibleFeature::Some { rust_feature },
                             }
+                        } else {
+                            UnknownCTargetFeature { feature, rust_feature: PossibleFeature::None }
+                        };
+                        sess.dcx().emit_warn(unknown_feature);
+                    }
+                    Some((_, stability, _)) => {
+                        if let Err(reason) = stability.toggle_allowed() {
+                            sess.dcx().emit_warn(ForbiddenCTargetFeature {
+                                feature,
+                                enabled: if enable { "enabled" } else { "disabled" },
+                                reason,
+                            });
+                        } else if stability.requires_nightly().is_some() {
+                            // An unstable feature. Warn about using it. It makes little sense
+                            // to hard-error here since we just warn about fully unknown
+                            // features above.
+                            sess.dcx().emit_warn(UnstableCTargetFeature { feature });
                         }
                     }
+                }
 
-                    // FIXME(nagisa): figure out how to not allocate a full hashset here.
-                    featsmap.insert(feature, enable_disable == '+');
+                // Ensure that the features we enable/disable are compatible with the ABI.
+                if enable {
+                    if abi_incompatible_set.contains(feature) {
+                        sess.dcx().emit_warn(ForbiddenCTargetFeature {
+                            feature,
+                            enabled: "enabled",
+                            reason: "this feature is incompatible with the target ABI",
+                        });
+                    }
+                } else {
+                    // FIXME: we have to request implied features here since
+                    // negative features do not handle implied features above.
+                    for &required in abi_feature_constraints.required.iter() {
+                        let implied =
+                            sess.target.implied_target_features(std::iter::once(required));
+                        if implied.contains(feature) {
+                            sess.dcx().emit_warn(ForbiddenCTargetFeature {
+                                feature,
+                                enabled: "disabled",
+                                reason: "this feature is required by the target ABI",
+                            });
+                        }
+                    }
                 }
 
+                // FIXME(nagisa): figure out how to not allocate a full hashset here.
+                featsmap.insert(feature, enable);
+            }
+        }
+
+        // To be sure the ABI-relevant features are all in the right state, we explicitly
+        // (un)set them here. This means if the target spec sets those features wrong,
+        // we will silently correct them rather than silently producing wrong code.
+        // (The target sanity check tries to catch this, but we can't know which features are
+        // enabled in LLVM by default so we can't be fully sure about that check.)
+        // We add these at the beginning of the list so that `-Ctarget-features` can
+        // still override it... that's unsound, but more compatible with past behavior.
+        all_rust_features.splice(
+            0..0,
+            abi_feature_constraints
+                .required
+                .iter()
+                .map(|&f| (true, f))
+                .chain(abi_feature_constraints.incompatible.iter().map(|&f| (false, f))),
+        );
+
+        // Translate this into LLVM features.
+        let feats = all_rust_features
+            .iter()
+            .filter_map(|&(enable, feature)| {
+                let enable_disable = if enable { '+' } else { '-' };
                 // We run through `to_llvm_features` when
                 // passing requests down to LLVM. This means that all in-language
                 // features also work on the command line instead of having two
@@ -746,9 +809,9 @@ pub(crate) fn global_llvm_features(
                         enable_disable, llvm_feature.llvm_feature_name
                     ))
                     .chain(llvm_feature.dependency.into_iter().filter_map(
-                        move |feat| match (enable_disable, feat) {
-                            ('-' | '+', TargetFeatureFoldStrength::Both(f))
-                            | ('+', TargetFeatureFoldStrength::EnableOnly(f)) => {
+                        move |feat| match (enable, feat) {
+                            (_, TargetFeatureFoldStrength::Both(f))
+                            | (true, TargetFeatureFoldStrength::EnableOnly(f)) => {
                                 Some(format!("{enable_disable}{f}"))
                             }
                             _ => None,
@@ -780,22 +843,6 @@ pub(crate) fn global_llvm_features(
     features
 }
 
-/// Returns a feature name for the given `+feature` or `-feature` string.
-///
-/// Only allows features that are backend specific (i.e. not [`RUSTC_SPECIFIC_FEATURES`].)
-fn backend_feature_name<'a>(sess: &Session, s: &'a str) -> Option<&'a str> {
-    // features must start with a `+` or `-`.
-    let feature = s
-        .strip_prefix(&['+', '-'][..])
-        .unwrap_or_else(|| sess.dcx().emit_fatal(InvalidTargetFeaturePrefix { feature: s }));
-    // Rustc-specific feature requests like `+crt-static` or `-crt-static`
-    // are not passed down to LLVM.
-    if s.is_empty() || RUSTC_SPECIFIC_FEATURES.contains(&feature) {
-        return None;
-    }
-    Some(feature)
-}
-
 pub(crate) fn tune_cpu(sess: &Session) -> Option<&str> {
     let name = sess.opts.unstable_opts.tune_cpu.as_ref()?;
     Some(handle_native(name))
diff --git a/compiler/rustc_codegen_llvm/src/type_of.rs b/compiler/rustc_codegen_llvm/src/type_of.rs
index 2b05e24a7ba..b0b6da869da 100644
--- a/compiler/rustc_codegen_llvm/src/type_of.rs
+++ b/compiler/rustc_codegen_llvm/src/type_of.rs
@@ -38,7 +38,7 @@ fn uncached_llvm_type<'a, 'tcx>(
             if let (&ty::Adt(def, _), &Variants::Single { index }) =
                 (layout.ty.kind(), &layout.variants)
             {
-                if def.is_enum() && !def.variants().is_empty() {
+                if def.is_enum() {
                     write!(&mut name, "::{}", def.variant(index).name).unwrap();
                 }
             }