about summary refs log tree commit diff
path: root/compiler/rustc_codegen_llvm/src
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_codegen_llvm/src')
-rw-r--r--compiler/rustc_codegen_llvm/src/abi.rs3
-rw-r--r--compiler/rustc_codegen_llvm/src/allocator.rs4
-rw-r--r--compiler/rustc_codegen_llvm/src/asm.rs4
-rw-r--r--compiler/rustc_codegen_llvm/src/back/archive.rs11
-rw-r--r--compiler/rustc_codegen_llvm/src/back/lto.rs29
-rw-r--r--compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs2
-rw-r--r--compiler/rustc_codegen_llvm/src/back/write.rs253
-rw-r--r--compiler/rustc_codegen_llvm/src/builder.rs6
-rw-r--r--compiler/rustc_codegen_llvm/src/builder/autodiff.rs46
-rw-r--r--compiler/rustc_codegen_llvm/src/common.rs2
-rw-r--r--compiler/rustc_codegen_llvm/src/consts.rs6
-rw-r--r--compiler/rustc_codegen_llvm/src/context.rs10
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs2
-rw-r--r--compiler/rustc_codegen_llvm/src/declare.rs2
-rw-r--r--compiler/rustc_codegen_llvm/src/intrinsic.rs2
-rw-r--r--compiler/rustc_codegen_llvm/src/lib.rs2
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/ffi.rs3
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/mod.rs4
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm_util.rs3
-rw-r--r--compiler/rustc_codegen_llvm/src/type_.rs4
20 files changed, 190 insertions, 208 deletions
diff --git a/compiler/rustc_codegen_llvm/src/abi.rs b/compiler/rustc_codegen_llvm/src/abi.rs
index 685b2f37c9c..28f423efc21 100644
--- a/compiler/rustc_codegen_llvm/src/abi.rs
+++ b/compiler/rustc_codegen_llvm/src/abi.rs
@@ -664,7 +664,7 @@ impl<'tcx> AbiBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
 }
 
 impl llvm::CallConv {
-    pub fn from_conv(conv: Conv, arch: &str) -> Self {
+    pub(crate) fn from_conv(conv: Conv, arch: &str) -> Self {
         match conv {
             Conv::C
             | Conv::Rust
@@ -687,7 +687,6 @@ impl llvm::CallConv {
             Conv::AvrNonBlockingInterrupt => llvm::AvrNonBlockingInterrupt,
             Conv::ArmAapcs => llvm::ArmAapcsCallConv,
             Conv::Msp430Intr => llvm::Msp430Intr,
-            Conv::PtxKernel => llvm::PtxKernel,
             Conv::X86Fastcall => llvm::X86FastcallCallConv,
             Conv::X86Intr => llvm::X86_Intr,
             Conv::X86Stdcall => llvm::X86StdcallCallConv,
diff --git a/compiler/rustc_codegen_llvm/src/allocator.rs b/compiler/rustc_codegen_llvm/src/allocator.rs
index 149ded28356..66723cbf882 100644
--- a/compiler/rustc_codegen_llvm/src/allocator.rs
+++ b/compiler/rustc_codegen_llvm/src/allocator.rs
@@ -81,13 +81,13 @@ pub(crate) unsafe fn codegen(
         llvm::set_visibility(ll_g, llvm::Visibility::from_generic(tcx.sess.default_visibility()));
         let val = tcx.sess.opts.unstable_opts.oom.should_panic();
         let llval = llvm::LLVMConstInt(i8, val as u64, False);
-        llvm::LLVMSetInitializer(ll_g, llval);
+        llvm::set_initializer(ll_g, llval);
 
         let name = NO_ALLOC_SHIM_IS_UNSTABLE;
         let ll_g = llvm::LLVMRustGetOrInsertGlobal(llmod, name.as_c_char_ptr(), name.len(), i8);
         llvm::set_visibility(ll_g, llvm::Visibility::from_generic(tcx.sess.default_visibility()));
         let llval = llvm::LLVMConstInt(i8, 0, False);
-        llvm::LLVMSetInitializer(ll_g, llval);
+        llvm::set_initializer(ll_g, llval);
     }
 
     if tcx.sess.opts.debuginfo != DebugInfo::None {
diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs
index 3722d4350a2..be5673eddf9 100644
--- a/compiler/rustc_codegen_llvm/src/asm.rs
+++ b/compiler/rustc_codegen_llvm/src/asm.rs
@@ -286,7 +286,9 @@ impl<'ll, 'tcx> AsmBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                 InlineAsmArch::M68k => {
                     constraints.push("~{ccr}".to_string());
                 }
-                InlineAsmArch::CSKY => {}
+                InlineAsmArch::CSKY => {
+                    constraints.push("~{psr}".to_string());
+                }
             }
         }
         if !options.contains(InlineAsmOptions::NOMEM) {
diff --git a/compiler/rustc_codegen_llvm/src/back/archive.rs b/compiler/rustc_codegen_llvm/src/back/archive.rs
index 33a956e552f..93553f3f364 100644
--- a/compiler/rustc_codegen_llvm/src/back/archive.rs
+++ b/compiler/rustc_codegen_llvm/src/back/archive.rs
@@ -11,7 +11,7 @@ use rustc_codegen_ssa::back::archive::{
 use rustc_session::Session;
 
 use crate::llvm::archive_ro::{ArchiveRO, Child};
-use crate::llvm::{self, ArchiveKind};
+use crate::llvm::{self, ArchiveKind, last_error};
 
 /// Helper for adding many files to an archive.
 #[must_use = "must call build() to finish building the archive"]
@@ -169,6 +169,8 @@ impl<'a> LlvmArchiveBuilder<'a> {
             .unwrap_or_else(|kind| self.sess.dcx().emit_fatal(UnknownArchiveKind { kind }));
 
         let mut additions = mem::take(&mut self.additions);
+        // Values in the `members` list below will contain pointers to the strings allocated here.
+        // So they need to get dropped after all elements of `members` get freed.
         let mut strings = Vec::new();
         let mut members = Vec::new();
 
@@ -229,12 +231,7 @@ impl<'a> LlvmArchiveBuilder<'a> {
                 self.sess.target.arch == "arm64ec",
             );
             let ret = if r.into_result().is_err() {
-                let err = llvm::LLVMRustGetLastError();
-                let msg = if err.is_null() {
-                    "failed to write archive".into()
-                } else {
-                    String::from_utf8_lossy(CStr::from_ptr(err).to_bytes())
-                };
+                let msg = last_error().unwrap_or_else(|| "failed to write archive".into());
                 Err(io::Error::new(io::ErrorKind::Other, msg))
             } else {
                 Ok(!members.is_empty())
diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs
index 78c759bbe8c..7262fce4911 100644
--- a/compiler/rustc_codegen_llvm/src/back/lto.rs
+++ b/compiler/rustc_codegen_llvm/src/back/lto.rs
@@ -606,10 +606,31 @@ pub(crate) fn run_pass_manager(
 
     // If this rustc version was build with enzyme/autodiff enabled, and if users applied the
     // `#[autodiff]` macro at least once, then we will later call llvm_optimize a second time.
-    let first_run = true;
     debug!("running llvm pm opt pipeline");
     unsafe {
-        write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, first_run)?;
+        write::llvm_optimize(
+            cgcx,
+            dcx,
+            module,
+            config,
+            opt_level,
+            opt_stage,
+            write::AutodiffStage::DuringAD,
+        )?;
+    }
+    // FIXME(ZuseZ4): Make this more granular
+    if cfg!(llvm_enzyme) && !thin {
+        unsafe {
+            write::llvm_optimize(
+                cgcx,
+                dcx,
+                module,
+                config,
+                opt_level,
+                llvm::OptStage::FatLTO,
+                write::AutodiffStage::PostAD,
+            )?;
+        }
     }
     debug!("lto done");
     Ok(())
@@ -621,7 +642,7 @@ unsafe impl Send for ModuleBuffer {}
 unsafe impl Sync for ModuleBuffer {}
 
 impl ModuleBuffer {
-    pub fn new(m: &llvm::Module) -> ModuleBuffer {
+    pub(crate) fn new(m: &llvm::Module) -> ModuleBuffer {
         ModuleBuffer(unsafe { llvm::LLVMRustModuleBufferCreate(m) })
     }
 }
@@ -663,7 +684,7 @@ unsafe impl Send for ThinBuffer {}
 unsafe impl Sync for ThinBuffer {}
 
 impl ThinBuffer {
-    pub fn new(m: &llvm::Module, is_thin: bool, emit_summary: bool) -> ThinBuffer {
+    pub(crate) fn new(m: &llvm::Module, is_thin: bool, emit_summary: bool) -> ThinBuffer {
         unsafe {
             let buffer = llvm::LLVMRustThinLTOBufferCreate(m, is_thin, emit_summary);
             ThinBuffer(buffer)
diff --git a/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs b/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
index 4cbd49aa44d..f075f332462 100644
--- a/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
+++ b/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
@@ -17,7 +17,7 @@ pub struct OwnedTargetMachine {
 }
 
 impl OwnedTargetMachine {
-    pub fn new(
+    pub(crate) fn new(
         triple: &CStr,
         cpu: &CStr,
         features: &CStr,
diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs
index 4706744f353..58933a77e53 100644
--- a/compiler/rustc_codegen_llvm/src/back/write.rs
+++ b/compiler/rustc_codegen_llvm/src/back/write.rs
@@ -40,7 +40,7 @@ use crate::errors::{
     WithLlvmError, WriteBytecode,
 };
 use crate::llvm::diagnostic::OptimizationDiagnosticKind::*;
-use crate::llvm::{self, DiagnosticInfo, PassManager};
+use crate::llvm::{self, DiagnosticInfo};
 use crate::type_::Type;
 use crate::{LlvmCodegenBackend, ModuleLlvm, base, common, llvm_util};
 
@@ -54,7 +54,7 @@ pub(crate) fn llvm_err<'a>(dcx: DiagCtxtHandle<'_>, err: LlvmError<'a>) -> Fatal
 fn write_output_file<'ll>(
     dcx: DiagCtxtHandle<'_>,
     target: &'ll llvm::TargetMachine,
-    pm: &llvm::PassManager<'ll>,
+    no_builtins: bool,
     m: &'ll llvm::Module,
     output: &Path,
     dwo_output: Option<&Path>,
@@ -63,16 +63,19 @@ fn write_output_file<'ll>(
     verify_llvm_ir: bool,
 ) -> Result<(), FatalError> {
     debug!("write_output_file output={:?} dwo_output={:?}", output, dwo_output);
-    unsafe {
-        let output_c = path_to_c_string(output);
-        let dwo_output_c;
-        let dwo_output_ptr = if let Some(dwo_output) = dwo_output {
-            dwo_output_c = path_to_c_string(dwo_output);
-            dwo_output_c.as_ptr()
-        } else {
-            std::ptr::null()
-        };
-        let result = llvm::LLVMRustWriteOutputFile(
+    let output_c = path_to_c_string(output);
+    let dwo_output_c;
+    let dwo_output_ptr = if let Some(dwo_output) = dwo_output {
+        dwo_output_c = path_to_c_string(dwo_output);
+        dwo_output_c.as_ptr()
+    } else {
+        std::ptr::null()
+    };
+    let result = unsafe {
+        let pm = llvm::LLVMCreatePassManager();
+        llvm::LLVMAddAnalysisPasses(target, pm);
+        llvm::LLVMRustAddLibraryInfo(pm, m, no_builtins);
+        llvm::LLVMRustWriteOutputFile(
             target,
             pm,
             m,
@@ -80,22 +83,22 @@ fn write_output_file<'ll>(
             dwo_output_ptr,
             file_type,
             verify_llvm_ir,
-        );
+        )
+    };
 
-        // Record artifact sizes for self-profiling
-        if result == llvm::LLVMRustResult::Success {
-            let artifact_kind = match file_type {
-                llvm::FileType::ObjectFile => "object_file",
-                llvm::FileType::AssemblyFile => "assembly_file",
-            };
-            record_artifact_size(self_profiler_ref, artifact_kind, output);
-            if let Some(dwo_file) = dwo_output {
-                record_artifact_size(self_profiler_ref, "dwo_file", dwo_file);
-            }
+    // Record artifact sizes for self-profiling
+    if result == llvm::LLVMRustResult::Success {
+        let artifact_kind = match file_type {
+            llvm::FileType::ObjectFile => "object_file",
+            llvm::FileType::AssemblyFile => "assembly_file",
+        };
+        record_artifact_size(self_profiler_ref, artifact_kind, output);
+        if let Some(dwo_file) = dwo_output {
+            record_artifact_size(self_profiler_ref, "dwo_file", dwo_file);
         }
-
-        result.into_result().map_err(|()| llvm_err(dcx, LlvmError::WriteOutput { path: output }))
     }
+
+    result.into_result().map_err(|()| llvm_err(dcx, LlvmError::WriteOutput { path: output }))
 }
 
 pub(crate) fn create_informational_target_machine(
@@ -325,13 +328,17 @@ pub(crate) fn save_temp_bitcode(
     if !cgcx.save_temps {
         return;
     }
+    let ext = format!("{name}.bc");
+    let cgu = Some(&module.name[..]);
+    let path = cgcx.output_filenames.temp_path_ext(&ext, cgu);
+    write_bitcode_to_file(module, &path)
+}
+
+fn write_bitcode_to_file(module: &ModuleCodegen<ModuleLlvm>, path: &Path) {
     unsafe {
-        let ext = format!("{name}.bc");
-        let cgu = Some(&module.name[..]);
-        let path = cgcx.output_filenames.temp_path_ext(&ext, cgu);
-        let cstr = path_to_c_string(&path);
+        let path = path_to_c_string(&path);
         let llmod = module.module_llvm.llmod();
-        llvm::LLVMWriteBitcodeToFile(llmod, cstr.as_ptr());
+        llvm::LLVMWriteBitcodeToFile(llmod, path.as_ptr());
     }
 }
 
@@ -530,6 +537,16 @@ fn get_instr_profile_output_path(config: &ModuleConfig) -> Option<CString> {
     config.instrument_coverage.then(|| c"default_%m_%p.profraw".to_owned())
 }
 
+// PreAD will run llvm opts but disable size increasing opts (vectorization, loop unrolling)
+// DuringAD is the same as above, but also runs the enzyme opt and autodiff passes.
+// PostAD will run all opts, including size increasing opts.
+#[derive(Debug, Eq, PartialEq)]
+pub(crate) enum AutodiffStage {
+    PreAD,
+    DuringAD,
+    PostAD,
+}
+
 pub(crate) unsafe fn llvm_optimize(
     cgcx: &CodegenContext<LlvmCodegenBackend>,
     dcx: DiagCtxtHandle<'_>,
@@ -537,7 +554,7 @@ pub(crate) unsafe fn llvm_optimize(
     config: &ModuleConfig,
     opt_level: config::OptLevel,
     opt_stage: llvm::OptStage,
-    skip_size_increasing_opts: bool,
+    autodiff_stage: AutodiffStage,
 ) -> Result<(), FatalError> {
     // Enzyme:
     // The whole point of compiler based AD is to differentiate optimized IR instead of unoptimized
@@ -550,12 +567,16 @@ pub(crate) unsafe fn llvm_optimize(
     let unroll_loops;
     let vectorize_slp;
     let vectorize_loop;
+    let run_enzyme = cfg!(llvm_enzyme) && autodiff_stage == AutodiffStage::DuringAD;
 
     // When we build rustc with enzyme/autodiff support, we want to postpone size-increasing
-    // optimizations until after differentiation. FIXME(ZuseZ4): Before shipping on nightly,
+    // optimizations until after differentiation. Our pipeline is thus: (opt + enzyme), (full opt).
+    // We therefore have two calls to llvm_optimize, if autodiff is used.
+    //
+    // FIXME(ZuseZ4): Before shipping on nightly,
     // we should make this more granular, or at least check that the user has at least one autodiff
     // call in their code, to justify altering the compilation pipeline.
-    if skip_size_increasing_opts && cfg!(llvm_enzyme) {
+    if cfg!(llvm_enzyme) && autodiff_stage != AutodiffStage::PostAD {
         unroll_loops = false;
         vectorize_slp = false;
         vectorize_loop = false;
@@ -565,7 +586,7 @@ pub(crate) unsafe fn llvm_optimize(
         vectorize_slp = config.vectorize_slp;
         vectorize_loop = config.vectorize_loop;
     }
-    trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop);
+    trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop, ?run_enzyme);
     let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
     let pgo_gen_path = get_pgo_gen_path(config);
     let pgo_use_path = get_pgo_use_path(config);
@@ -633,6 +654,7 @@ pub(crate) unsafe fn llvm_optimize(
             vectorize_loop,
             config.no_builtins,
             config.emit_lifetime_markers,
+            run_enzyme,
             sanitizer_options.as_ref(),
             pgo_gen_path.as_ref().map_or(std::ptr::null(), |s| s.as_ptr()),
             pgo_use_path.as_ref().map_or(std::ptr::null(), |s| s.as_ptr()),
@@ -661,7 +683,6 @@ pub(crate) unsafe fn optimize(
 ) -> Result<(), FatalError> {
     let _timer = cgcx.prof.generic_activity_with_arg("LLVM_module_optimize", &*module.name);
 
-    let llmod = module.module_llvm.llmod();
     let llcx = &*module.module_llvm.llcx;
     let _handlers = DiagnosticHandlers::new(cgcx, dcx, llcx, module, CodegenDiagnosticsStage::Opt);
 
@@ -670,8 +691,7 @@ pub(crate) unsafe fn optimize(
 
     if config.emit_no_opt_bc {
         let out = cgcx.output_filenames.temp_path_ext("no-opt.bc", module_name);
-        let out = path_to_c_string(&out);
-        unsafe { llvm::LLVMWriteBitcodeToFile(llmod, out.as_ptr()) };
+        write_bitcode_to_file(module, &out)
     }
 
     // FIXME(ZuseZ4): support SanitizeHWAddress and prevent illegal/unsupported opts
@@ -684,18 +704,14 @@ pub(crate) unsafe fn optimize(
             _ => llvm::OptStage::PreLinkNoLTO,
         };
 
-        // If we know that we will later run AD, then we disable vectorization and loop unrolling
-        let skip_size_increasing_opts = cfg!(llvm_enzyme);
+        // If we know that we will later run AD, then we disable vectorization and loop unrolling.
+        // Otherwise we pretend AD is already done and run the normal opt pipeline (=PostAD).
+        // FIXME(ZuseZ4): Make this more granular, only set PreAD if we actually have autodiff
+        // usages, not just if we build rustc with autodiff support.
+        let autodiff_stage =
+            if cfg!(llvm_enzyme) { AutodiffStage::PreAD } else { AutodiffStage::PostAD };
         return unsafe {
-            llvm_optimize(
-                cgcx,
-                dcx,
-                module,
-                config,
-                opt_level,
-                opt_stage,
-                skip_size_increasing_opts,
-            )
+            llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, autodiff_stage)
         };
     }
     Ok(())
@@ -744,31 +760,6 @@ pub(crate) unsafe fn codegen(
             create_msvc_imps(cgcx, llcx, llmod);
         }
 
-        // A codegen-specific pass manager is used to generate object
-        // files for an LLVM module.
-        //
-        // Apparently each of these pass managers is a one-shot kind of
-        // thing, so we create a new one for each type of output. The
-        // pass manager passed to the closure should be ensured to not
-        // escape the closure itself, and the manager should only be
-        // used once.
-        unsafe fn with_codegen<'ll, F, R>(
-            tm: &'ll llvm::TargetMachine,
-            llmod: &'ll llvm::Module,
-            no_builtins: bool,
-            f: F,
-        ) -> R
-        where
-            F: FnOnce(&'ll mut PassManager<'ll>) -> R,
-        {
-            unsafe {
-                let cpm = llvm::LLVMCreatePassManager();
-                llvm::LLVMAddAnalysisPasses(tm, cpm);
-                llvm::LLVMRustAddLibraryInfo(cpm, llmod, no_builtins);
-                f(cpm)
-            }
-        }
-
         // Note that if object files are just LLVM bitcode we write bitcode,
         // copy it to the .o file, and delete the bitcode if it wasn't
         // otherwise requested.
@@ -887,21 +878,17 @@ pub(crate) unsafe fn codegen(
             } else {
                 llmod
             };
-            unsafe {
-                with_codegen(tm, llmod, config.no_builtins, |cpm| {
-                    write_output_file(
-                        dcx,
-                        tm,
-                        cpm,
-                        llmod,
-                        &path,
-                        None,
-                        llvm::FileType::AssemblyFile,
-                        &cgcx.prof,
-                        config.verify_llvm_ir,
-                    )
-                })?;
-            }
+            write_output_file(
+                dcx,
+                tm,
+                config.no_builtins,
+                llmod,
+                &path,
+                None,
+                llvm::FileType::AssemblyFile,
+                &cgcx.prof,
+                config.verify_llvm_ir,
+            )?;
         }
 
         match config.emit_obj {
@@ -925,21 +912,17 @@ pub(crate) unsafe fn codegen(
                     (_, SplitDwarfKind::Split) => Some(dwo_out.as_path()),
                 };
 
-                unsafe {
-                    with_codegen(tm, llmod, config.no_builtins, |cpm| {
-                        write_output_file(
-                            dcx,
-                            tm,
-                            cpm,
-                            llmod,
-                            &obj_out,
-                            dwo_out,
-                            llvm::FileType::ObjectFile,
-                            &cgcx.prof,
-                            config.verify_llvm_ir,
-                        )
-                    })?;
-                }
+                write_output_file(
+                    dcx,
+                    tm,
+                    config.no_builtins,
+                    llmod,
+                    &obj_out,
+                    dwo_out,
+                    llvm::FileType::ObjectFile,
+                    &cgcx.prof,
+                    config.verify_llvm_ir,
+                )?;
             }
 
             EmitObj::Bitcode => {
@@ -1066,24 +1049,18 @@ unsafe fn embed_bitcode(
         {
             // We don't need custom section flags, create LLVM globals.
             let llconst = common::bytes_in_context(llcx, bitcode);
-            let llglobal = llvm::LLVMAddGlobal(
-                llmod,
-                common::val_ty(llconst),
-                c"rustc.embedded.module".as_ptr(),
-            );
-            llvm::LLVMSetInitializer(llglobal, llconst);
+            let llglobal =
+                llvm::add_global(llmod, common::val_ty(llconst), c"rustc.embedded.module");
+            llvm::set_initializer(llglobal, llconst);
 
             llvm::set_section(llglobal, bitcode_section_name(cgcx));
             llvm::set_linkage(llglobal, llvm::Linkage::PrivateLinkage);
             llvm::LLVMSetGlobalConstant(llglobal, llvm::True);
 
             let llconst = common::bytes_in_context(llcx, cmdline.as_bytes());
-            let llglobal = llvm::LLVMAddGlobal(
-                llmod,
-                common::val_ty(llconst),
-                c"rustc.embedded.cmdline".as_ptr(),
-            );
-            llvm::LLVMSetInitializer(llglobal, llconst);
+            let llglobal =
+                llvm::add_global(llmod, common::val_ty(llconst), c"rustc.embedded.cmdline");
+            llvm::set_initializer(llglobal, llconst);
             let section = if cgcx.target_is_like_osx {
                 c"__LLVM,__cmdline"
             } else if cgcx.target_is_like_aix {
@@ -1123,31 +1100,29 @@ fn create_msvc_imps(
     // underscores added in front).
     let prefix = if cgcx.target_arch == "x86" { "\x01__imp__" } else { "\x01__imp_" };
 
-    unsafe {
-        let ptr_ty = Type::ptr_llcx(llcx);
-        let globals = base::iter_globals(llmod)
-            .filter(|&val| {
-                llvm::get_linkage(val) == llvm::Linkage::ExternalLinkage
-                    && llvm::LLVMIsDeclaration(val) == 0
-            })
-            .filter_map(|val| {
-                // Exclude some symbols that we know are not Rust symbols.
-                let name = llvm::get_value_name(val);
-                if ignored(name) { None } else { Some((val, name)) }
-            })
-            .map(move |(val, name)| {
-                let mut imp_name = prefix.as_bytes().to_vec();
-                imp_name.extend(name);
-                let imp_name = CString::new(imp_name).unwrap();
-                (imp_name, val)
-            })
-            .collect::<Vec<_>>();
+    let ptr_ty = Type::ptr_llcx(llcx);
+    let globals = base::iter_globals(llmod)
+        .filter(|&val| {
+            llvm::get_linkage(val) == llvm::Linkage::ExternalLinkage && !llvm::is_declaration(val)
+        })
+        .filter_map(|val| {
+            // Exclude some symbols that we know are not Rust symbols.
+            let name = llvm::get_value_name(val);
+            if ignored(name) { None } else { Some((val, name)) }
+        })
+        .map(move |(val, name)| {
+            let mut imp_name = prefix.as_bytes().to_vec();
+            imp_name.extend(name);
+            let imp_name = CString::new(imp_name).unwrap();
+            (imp_name, val)
+        })
+        .collect::<Vec<_>>();
 
-        for (imp_name, val) in globals {
-            let imp = llvm::LLVMAddGlobal(llmod, ptr_ty, imp_name.as_ptr());
-            llvm::LLVMSetInitializer(imp, val);
-            llvm::set_linkage(imp, llvm::Linkage::ExternalLinkage);
-        }
+    for (imp_name, val) in globals {
+        let imp = llvm::add_global(llmod, ptr_ty, &imp_name);
+
+        llvm::set_initializer(imp, val);
+        llvm::set_linkage(imp, llvm::Linkage::ExternalLinkage);
     }
 
     // Use this function to exclude certain symbols from `__imp` generation.
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index 264d43c6d46..acae0b444c0 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -543,7 +543,8 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         unsafe {
             let alloca = llvm::LLVMBuildAlloca(bx.llbuilder, ty, UNNAMED);
             llvm::LLVMSetAlignment(alloca, align.bytes() as c_uint);
-            alloca
+            // Cast to default addrspace if necessary
+            llvm::LLVMBuildPointerCast(bx.llbuilder, alloca, self.cx().type_ptr(), UNNAMED)
         }
     }
 
@@ -552,7 +553,8 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
             let alloca =
                 llvm::LLVMBuildArrayAlloca(self.llbuilder, self.cx().type_i8(), size, UNNAMED);
             llvm::LLVMSetAlignment(alloca, align.bytes() as c_uint);
-            alloca
+            // Cast to default addrspace if necessary
+            llvm::LLVMBuildPointerCast(self.llbuilder, alloca, self.cx().type_ptr(), UNNAMED)
         }
     }
 
diff --git a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
index dd5e726160d..b2c1088e3fc 100644
--- a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
+++ b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
@@ -4,10 +4,9 @@ use rustc_ast::expand::autodiff_attrs::{AutoDiffAttrs, AutoDiffItem, DiffActivit
 use rustc_codegen_ssa::ModuleCodegen;
 use rustc_codegen_ssa::back::write::ModuleConfig;
 use rustc_errors::FatalError;
-use rustc_session::config::Lto;
 use tracing::{debug, trace};
 
-use crate::back::write::{llvm_err, llvm_optimize};
+use crate::back::write::llvm_err;
 use crate::builder::SBuilder;
 use crate::context::SimpleCx;
 use crate::declare::declare_simple_fn;
@@ -53,8 +52,6 @@ fn generate_enzyme_call<'ll>(
     let mut ad_name: String = match attrs.mode {
         DiffMode::Forward => "__enzyme_fwddiff",
         DiffMode::Reverse => "__enzyme_autodiff",
-        DiffMode::ForwardFirst => "__enzyme_fwddiff",
-        DiffMode::ReverseFirst => "__enzyme_autodiff",
         _ => panic!("logic bug in autodiff, unrecognized mode"),
     }
     .to_string();
@@ -153,7 +150,7 @@ fn generate_enzyme_call<'ll>(
             _ => {}
         }
 
-        trace!("matching autodiff arguments");
+        debug!("matching autodiff arguments");
         // We now handle the issue that Rust level arguments not always match the llvm-ir level
         // arguments. A slice, `&[f32]`, for example, is represented as a pointer and a length on
         // llvm-ir level. The number of activities matches the number of Rust level arguments, so we
@@ -164,10 +161,10 @@ fn generate_enzyme_call<'ll>(
         let mut activity_pos = 0;
         let outer_args: Vec<&llvm::Value> = get_params(outer_fn);
         while activity_pos < inputs.len() {
-            let activity = inputs[activity_pos as usize];
+            let diff_activity = inputs[activity_pos as usize];
             // Duplicated arguments received a shadow argument, into which enzyme will write the
             // gradient.
-            let (activity, duplicated): (&Metadata, bool) = match activity {
+            let (activity, duplicated): (&Metadata, bool) = match diff_activity {
                 DiffActivity::None => panic!("not a valid input activity"),
                 DiffActivity::Const => (enzyme_const, false),
                 DiffActivity::Active => (enzyme_out, false),
@@ -222,7 +219,15 @@ fn generate_enzyme_call<'ll>(
                     // A duplicated pointer will have the following two outer_fn arguments:
                     // (..., ptr, ptr, ...). We add the following llvm-ir to our __enzyme call:
                     // (..., metadata! enzyme_dup, ptr, ptr, ...).
-                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Pointer);
+                    if matches!(
+                        diff_activity,
+                        DiffActivity::Duplicated | DiffActivity::DuplicatedOnly
+                    ) {
+                        assert!(
+                            llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Pointer
+                        );
+                    }
+                    // In the case of Dual we don't have assumptions, e.g. f32 would be valid.
                     args.push(next_outer_arg);
                     outer_pos += 2;
                     activity_pos += 1;
@@ -277,7 +282,7 @@ pub(crate) fn differentiate<'ll>(
     module: &'ll ModuleCodegen<ModuleLlvm>,
     cgcx: &CodegenContext<LlvmCodegenBackend>,
     diff_items: Vec<AutoDiffItem>,
-    config: &ModuleConfig,
+    _config: &ModuleConfig,
 ) -> Result<(), FatalError> {
     for item in &diff_items {
         trace!("{}", item);
@@ -318,29 +323,6 @@ pub(crate) fn differentiate<'ll>(
 
     // FIXME(ZuseZ4): support SanitizeHWAddress and prevent illegal/unsupported opts
 
-    if let Some(opt_level) = config.opt_level {
-        let opt_stage = match cgcx.lto {
-            Lto::Fat => llvm::OptStage::PreLinkFatLTO,
-            Lto::Thin | Lto::ThinLocal => llvm::OptStage::PreLinkThinLTO,
-            _ if cgcx.opts.cg.linker_plugin_lto.enabled() => llvm::OptStage::PreLinkThinLTO,
-            _ => llvm::OptStage::PreLinkNoLTO,
-        };
-        // This is our second opt call, so now we run all opts,
-        // to make sure we get the best performance.
-        let skip_size_increasing_opts = false;
-        trace!("running Module Optimization after differentiation");
-        unsafe {
-            llvm_optimize(
-                cgcx,
-                diag_handler.handle(),
-                module,
-                config,
-                opt_level,
-                opt_stage,
-                skip_size_increasing_opts,
-            )?
-        };
-    }
     trace!("done with differentiate()");
 
     Ok(())
diff --git a/compiler/rustc_codegen_llvm/src/common.rs b/compiler/rustc_codegen_llvm/src/common.rs
index 8c94a46ebf3..78b3a7f8541 100644
--- a/compiler/rustc_codegen_llvm/src/common.rs
+++ b/compiler/rustc_codegen_llvm/src/common.rs
@@ -219,8 +219,8 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
                 let g = self.define_global(&sym, self.val_ty(sc)).unwrap_or_else(|| {
                     bug!("symbol `{}` is already defined", sym);
                 });
+                llvm::set_initializer(g, sc);
                 unsafe {
-                    llvm::LLVMSetInitializer(g, sc);
                     llvm::LLVMSetGlobalConstant(g, True);
                     llvm::LLVMSetUnnamedAddress(g, llvm::UnnamedAddr::Global);
                 }
diff --git a/compiler/rustc_codegen_llvm/src/consts.rs b/compiler/rustc_codegen_llvm/src/consts.rs
index c6855dd42e5..4a5491ec7a1 100644
--- a/compiler/rustc_codegen_llvm/src/consts.rs
+++ b/compiler/rustc_codegen_llvm/src/consts.rs
@@ -191,7 +191,7 @@ fn check_and_apply_linkage<'ll, 'tcx>(
             })
         });
         llvm::set_linkage(g2, llvm::Linkage::InternalLinkage);
-        unsafe { llvm::LLVMSetInitializer(g2, g1) };
+        llvm::set_initializer(g2, g1);
         g2
     } else if cx.tcx.sess.target.arch == "x86"
         && common::is_mingw_gnu_toolchain(&cx.tcx.sess.target)
@@ -235,7 +235,7 @@ impl<'ll> CodegenCx<'ll, '_> {
             }
             _ => self.define_private_global(self.val_ty(cv)),
         };
-        unsafe { llvm::LLVMSetInitializer(gv, cv) };
+        llvm::set_initializer(gv, cv);
         set_global_alignment(self, gv, align);
         llvm::SetUnnamedAddress(gv, llvm::UnnamedAddr::Global);
         gv
@@ -458,7 +458,7 @@ impl<'ll> CodegenCx<'ll, '_> {
                 new_g
             };
             set_global_alignment(self, g, alloc.align);
-            llvm::LLVMSetInitializer(g, v);
+            llvm::set_initializer(g, v);
 
             if self.should_assume_dso_local(g, true) {
                 llvm::LLVMRustSetDSOLocal(g, true);
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index ba4fd75fb94..7fe527a4c07 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -616,12 +616,10 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
     pub(crate) fn create_used_variable_impl(&self, name: &'static CStr, values: &[&'ll Value]) {
         let array = self.const_array(self.type_ptr(), values);
 
-        unsafe {
-            let g = llvm::LLVMAddGlobal(self.llmod, self.val_ty(array), name.as_ptr());
-            llvm::LLVMSetInitializer(g, array);
-            llvm::set_linkage(g, llvm::Linkage::AppendingLinkage);
-            llvm::set_section(g, c"llvm.metadata");
-        }
+        let g = llvm::add_global(self.llmod, self.val_ty(array), name);
+        llvm::set_initializer(g, array);
+        llvm::set_linkage(g, llvm::Linkage::AppendingLinkage);
+        llvm::set_section(g, c"llvm.metadata");
     }
 }
 impl<'ll> SimpleCx<'ll> {
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs b/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
index 2c9f1cda13a..54c5d445f66 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
@@ -73,7 +73,7 @@ pub(crate) fn get_or_insert_gdb_debug_scripts_section_global<'ll>(
                 .define_global(section_var_name, llvm_type)
                 .unwrap_or_else(|| bug!("symbol `{}` is already defined", section_var_name));
             llvm::set_section(section_var, c".debug_gdb_scripts");
-            llvm::LLVMSetInitializer(section_var, cx.const_bytes(section_contents));
+            llvm::set_initializer(section_var, cx.const_bytes(section_contents));
             llvm::LLVMSetGlobalConstant(section_var, llvm::True);
             llvm::LLVMSetUnnamedAddress(section_var, llvm::UnnamedAddr::Global);
             llvm::set_linkage(section_var, llvm::Linkage::LinkOnceODRLinkage);
diff --git a/compiler/rustc_codegen_llvm/src/declare.rs b/compiler/rustc_codegen_llvm/src/declare.rs
index bdc83267cca..cebceef1b93 100644
--- a/compiler/rustc_codegen_llvm/src/declare.rs
+++ b/compiler/rustc_codegen_llvm/src/declare.rs
@@ -235,7 +235,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
     /// name.
     pub(crate) fn get_defined_value(&self, name: &str) -> Option<&'ll Value> {
         self.get_declared_value(name).and_then(|val| {
-            let declaration = unsafe { llvm::LLVMIsDeclaration(val) != 0 };
+            let declaration = llvm::is_declaration(val);
             if !declaration { Some(val) } else { None }
         })
     }
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index 3200c94d977..8b976885904 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -824,7 +824,7 @@ fn codegen_msvc_try<'ll>(
         if bx.cx.tcx.sess.target.supports_comdat() {
             llvm::SetUniqueComdat(bx.llmod, tydesc);
         }
-        unsafe { llvm::LLVMSetInitializer(tydesc, type_info) };
+        llvm::set_initializer(tydesc, type_info);
 
         // The flag value of 8 indicates that we are catching the exception by
         // reference instead of by value. We can't use catch by value because
diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
index f0d04b2b644..bcb1cc72b56 100644
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -30,7 +30,7 @@ use std::mem::ManuallyDrop;
 use back::owned_target_machine::OwnedTargetMachine;
 use back::write::{create_informational_target_machine, create_target_machine};
 use errors::{AutoDiffWithoutLTO, ParseTargetMachineConfig};
-pub use llvm_util::target_features_cfg;
+pub(crate) use llvm_util::target_features_cfg;
 use rustc_ast::expand::allocator::AllocatorKind;
 use rustc_ast::expand::autodiff_attrs::AutoDiffItem;
 use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule};
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index 50a40c9c309..441d144ce50 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -2359,7 +2359,7 @@ unsafe extern "C" {
     );
     pub fn LLVMRustWriteOutputFile<'a>(
         T: &'a TargetMachine,
-        PM: &PassManager<'a>,
+        PM: *mut PassManager<'a>,
         M: &'a Module,
         Output: *const c_char,
         DwoOutput: *const c_char,
@@ -2382,6 +2382,7 @@ unsafe extern "C" {
         LoopVectorize: bool,
         DisableSimplifyLibCalls: bool,
         EmitLifetimeMarkers: bool,
+        RunEnzyme: bool,
         SanitizerOptions: Option<&SanitizerOptions>,
         PGOGenPath: *const c_char,
         PGOUsePath: *const c_char,
diff --git a/compiler/rustc_codegen_llvm/src/llvm/mod.rs b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
index 707aeba22cc..7becba4ccd4 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
@@ -241,6 +241,10 @@ pub fn set_linkage(llglobal: &Value, linkage: Linkage) {
     }
 }
 
+pub fn is_declaration(llglobal: &Value) -> bool {
+    unsafe { LLVMIsDeclaration(llglobal) == ffi::True }
+}
+
 pub fn get_visibility(llglobal: &Value) -> Visibility {
     unsafe { LLVMGetVisibility(llglobal) }.to_rust()
 }
diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs
index 53611c746a7..b4b5d6a5b19 100644
--- a/compiler/rustc_codegen_llvm/src/llvm_util.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs
@@ -271,6 +271,7 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
         ("aarch64", "fp16") => Some(LLVMFeature::new("fullfp16")),
         // Filter out features that are not supported by the current LLVM version
         ("aarch64", "fpmr") if get_version().0 != 18 => None,
+        ("arm", "fp16") => Some(LLVMFeature::new("fullfp16")),
         // In LLVM 18, `unaligned-scalar-mem` was merged with `unaligned-vector-mem` into a single
         // feature called `fast-unaligned-access`. In LLVM 19, it was split back out.
         ("riscv32" | "riscv64", "unaligned-scalar-mem") if get_version().0 == 18 => {
@@ -303,7 +304,7 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
 /// Must express features in the way Rust understands them.
 ///
 /// We do not have to worry about RUSTC_SPECIFIC_FEATURES here, those are handled outside codegen.
-pub fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
+pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
     let mut features: FxHashSet<Symbol> = Default::default();
 
     // Add base features for the target.
diff --git a/compiler/rustc_codegen_llvm/src/type_.rs b/compiler/rustc_codegen_llvm/src/type_.rs
index 34cede54aaf..d61ce417562 100644
--- a/compiler/rustc_codegen_llvm/src/type_.rs
+++ b/compiler/rustc_codegen_llvm/src/type_.rs
@@ -237,11 +237,11 @@ impl<'ll, 'tcx> BaseTypeCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
 
 impl Type {
     /// Creates an integer type with the given number of bits, e.g., i24
-    pub fn ix_llcx(llcx: &llvm::Context, num_bits: u64) -> &Type {
+    pub(crate) fn ix_llcx(llcx: &llvm::Context, num_bits: u64) -> &Type {
         unsafe { llvm::LLVMIntTypeInContext(llcx, num_bits as c_uint) }
     }
 
-    pub fn ptr_llcx(llcx: &llvm::Context) -> &Type {
+    pub(crate) fn ptr_llcx(llcx: &llvm::Context) -> &Type {
         unsafe { llvm::LLVMPointerTypeInContext(llcx, AddressSpace::DATA.0) }
     }
 }