about summary refs log tree commit diff
path: root/compiler/rustc_codegen_llvm/src
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_codegen_llvm/src')
-rw-r--r--compiler/rustc_codegen_llvm/src/attributes.rs17
-rw-r--r--compiler/rustc_codegen_llvm/src/back/write.rs9
-rw-r--r--compiler/rustc_codegen_llvm/src/builder.rs154
-rw-r--r--compiler/rustc_codegen_llvm/src/builder/autodiff.rs39
-rw-r--r--compiler/rustc_codegen_llvm/src/common.rs20
-rw-r--r--compiler/rustc_codegen_llvm/src/consts.rs61
-rw-r--r--compiler/rustc_codegen_llvm/src/context.rs66
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs6
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs2
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs8
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs59
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs40
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs6
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/mod.rs55
-rw-r--r--compiler/rustc_codegen_llvm/src/declare.rs28
-rw-r--r--compiler/rustc_codegen_llvm/src/intrinsic.rs211
-rw-r--r--compiler/rustc_codegen_llvm/src/lib.rs3
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs4
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/ffi.rs98
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm_util.rs56
-rw-r--r--compiler/rustc_codegen_llvm/src/type_.rs13
21 files changed, 629 insertions, 326 deletions
diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs
index 95e0481b035..3d7afa17bdf 100644
--- a/compiler/rustc_codegen_llvm/src/attributes.rs
+++ b/compiler/rustc_codegen_llvm/src/attributes.rs
@@ -333,9 +333,12 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>(
     let mut to_add = SmallVec::<[_; 16]>::new();
 
     match codegen_fn_attrs.optimize {
-        OptimizeAttr::None => {
+        OptimizeAttr::Default => {
             to_add.extend(default_optimisation_attrs(cx));
         }
+        OptimizeAttr::DoNotOptimize => {
+            to_add.push(llvm::AttributeKind::OptimizeNone.create_attr(cx.llcx));
+        }
         OptimizeAttr::Size => {
             to_add.push(llvm::AttributeKind::MinSize.create_attr(cx.llcx));
             to_add.push(llvm::AttributeKind::OptimizeForSize.create_attr(cx.llcx));
@@ -343,12 +346,12 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>(
         OptimizeAttr::Speed => {}
     }
 
-    let inline =
-        if codegen_fn_attrs.inline == InlineAttr::None && instance.def.requires_inline(cx.tcx) {
-            InlineAttr::Hint
-        } else {
-            codegen_fn_attrs.inline
-        };
+    // `optnone` requires `noinline`
+    let inline = match (codegen_fn_attrs.inline, &codegen_fn_attrs.optimize) {
+        (_, OptimizeAttr::DoNotOptimize) => InlineAttr::Never,
+        (InlineAttr::None, _) if instance.def.requires_inline(cx.tcx) => InlineAttr::Hint,
+        (inline, _) => inline,
+    };
     to_add.extend(inline_attr(cx, inline));
 
     // The `uwtable` attribute according to LLVM is:
diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs
index 509b24dd703..4706744f353 100644
--- a/compiler/rustc_codegen_llvm/src/back/write.rs
+++ b/compiler/rustc_codegen_llvm/src/back/write.rs
@@ -769,12 +769,9 @@ pub(crate) unsafe fn codegen(
             }
         }
 
-        // Two things to note:
-        // - If object files are just LLVM bitcode we write bitcode, copy it to
-        //   the .o file, and delete the bitcode if it wasn't otherwise
-        //   requested.
-        // - If we don't have the integrated assembler then we need to emit
-        //   asm from LLVM and use `gcc` to create the object file.
+        // Note that if object files are just LLVM bitcode we write bitcode,
+        // copy it to the .o file, and delete the bitcode if it wasn't
+        // otherwise requested.
 
         let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
         let bc_summary_out =
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index 5a34b52e6ef..2d007416263 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -1,4 +1,4 @@
-use std::borrow::Cow;
+use std::borrow::{Borrow, Cow};
 use std::ops::Deref;
 use std::{iter, ptr};
 
@@ -31,20 +31,22 @@ use tracing::{debug, instrument};
 use crate::abi::FnAbiLlvmExt;
 use crate::attributes;
 use crate::common::Funclet;
-use crate::context::CodegenCx;
+use crate::context::{CodegenCx, SimpleCx};
 use crate::llvm::{self, AtomicOrdering, AtomicRmwBinOp, BasicBlock, False, True};
 use crate::type_::Type;
 use crate::type_of::LayoutLlvmExt;
 use crate::value::Value;
 
-// All Builders must have an llfn associated with them
 #[must_use]
-pub(crate) struct Builder<'a, 'll, 'tcx> {
+pub(crate) struct GenericBuilder<'a, 'll, CX: Borrow<SimpleCx<'ll>>> {
     pub llbuilder: &'ll mut llvm::Builder<'ll>,
-    pub cx: &'a CodegenCx<'ll, 'tcx>,
+    pub cx: &'a CX,
 }
 
-impl Drop for Builder<'_, '_, '_> {
+pub(crate) type SBuilder<'a, 'll> = GenericBuilder<'a, 'll, SimpleCx<'ll>>;
+pub(crate) type Builder<'a, 'll, 'tcx> = GenericBuilder<'a, 'll, CodegenCx<'ll, 'tcx>>;
+
+impl<'a, 'll, CX: Borrow<SimpleCx<'ll>>> Drop for GenericBuilder<'a, 'll, CX> {
     fn drop(&mut self) {
         unsafe {
             llvm::LLVMDisposeBuilder(&mut *(self.llbuilder as *mut _));
@@ -52,6 +54,112 @@ impl Drop for Builder<'_, '_, '_> {
     }
 }
 
+impl<'a, 'll> SBuilder<'a, 'll> {
+    fn call(
+        &mut self,
+        llty: &'ll Type,
+        llfn: &'ll Value,
+        args: &[&'ll Value],
+        funclet: Option<&Funclet<'ll>>,
+    ) -> &'ll Value {
+        debug!("call {:?} with args ({:?})", llfn, args);
+
+        let args = self.check_call("call", llty, llfn, args);
+        let funclet_bundle = funclet.map(|funclet| funclet.bundle());
+        let mut bundles: SmallVec<[_; 2]> = SmallVec::new();
+        if let Some(funclet_bundle) = funclet_bundle {
+            bundles.push(funclet_bundle);
+        }
+
+        let call = unsafe {
+            llvm::LLVMBuildCallWithOperandBundles(
+                self.llbuilder,
+                llty,
+                llfn,
+                args.as_ptr() as *const &llvm::Value,
+                args.len() as c_uint,
+                bundles.as_ptr(),
+                bundles.len() as c_uint,
+                c"".as_ptr(),
+            )
+        };
+        call
+    }
+
+    fn with_scx(scx: &'a SimpleCx<'ll>) -> Self {
+        // Create a fresh builder from the simple context.
+        let llbuilder = unsafe { llvm::LLVMCreateBuilderInContext(scx.llcx) };
+        SBuilder { llbuilder, cx: scx }
+    }
+}
+impl<'a, 'll, CX: Borrow<SimpleCx<'ll>>> GenericBuilder<'a, 'll, CX> {
+    pub(crate) fn bitcast(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
+        unsafe { llvm::LLVMBuildBitCast(self.llbuilder, val, dest_ty, UNNAMED) }
+    }
+
+    fn ret_void(&mut self) {
+        unsafe {
+            llvm::LLVMBuildRetVoid(self.llbuilder);
+        }
+    }
+
+    fn ret(&mut self, v: &'ll Value) {
+        unsafe {
+            llvm::LLVMBuildRet(self.llbuilder, v);
+        }
+    }
+}
+impl<'a, 'll> SBuilder<'a, 'll> {
+    fn build(cx: &'a SimpleCx<'ll>, llbb: &'ll BasicBlock) -> SBuilder<'a, 'll> {
+        let bx = SBuilder::with_scx(cx);
+        unsafe {
+            llvm::LLVMPositionBuilderAtEnd(bx.llbuilder, llbb);
+        }
+        bx
+    }
+
+    fn check_call<'b>(
+        &mut self,
+        typ: &str,
+        fn_ty: &'ll Type,
+        llfn: &'ll Value,
+        args: &'b [&'ll Value],
+    ) -> Cow<'b, [&'ll Value]> {
+        assert!(
+            self.cx.type_kind(fn_ty) == TypeKind::Function,
+            "builder::{typ} not passed a function, but {fn_ty:?}"
+        );
+
+        let param_tys = self.cx.func_params_types(fn_ty);
+
+        let all_args_match = iter::zip(&param_tys, args.iter().map(|&v| self.cx.val_ty(v)))
+            .all(|(expected_ty, actual_ty)| *expected_ty == actual_ty);
+
+        if all_args_match {
+            return Cow::Borrowed(args);
+        }
+
+        let casted_args: Vec<_> = iter::zip(param_tys, args)
+            .enumerate()
+            .map(|(i, (expected_ty, &actual_val))| {
+                let actual_ty = self.cx.val_ty(actual_val);
+                if expected_ty != actual_ty {
+                    debug!(
+                        "type mismatch in function call of {:?}. \
+                            Expected {:?} for param {}, got {:?}; injecting bitcast",
+                        llfn, expected_ty, i, actual_ty
+                    );
+                    self.bitcast(actual_val, expected_ty)
+                } else {
+                    actual_val
+                }
+            })
+            .collect();
+
+        Cow::Owned(casted_args)
+    }
+}
+
 /// Empty string, to be used where LLVM expects an instruction name, indicating
 /// that the instruction is to be left unnamed (i.e. numbered, in textual IR).
 // FIXME(eddyb) pass `&CStr` directly to FFI once it's a thin pointer.
@@ -1217,11 +1325,21 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
 impl<'ll> StaticBuilderMethods for Builder<'_, 'll, '_> {
     fn get_static(&mut self, def_id: DefId) -> &'ll Value {
         // Forward to the `get_static` method of `CodegenCx`
-        self.cx().get_static(def_id)
+        let s = self.cx().get_static(def_id);
+        // Cast to default address space if globals are in a different addrspace
+        self.cx().const_pointercast(s, self.type_ptr())
     }
 }
 
 impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
+    fn build(cx: &'a CodegenCx<'ll, 'tcx>, llbb: &'ll BasicBlock) -> Builder<'a, 'll, 'tcx> {
+        let bx = Builder::with_cx(cx);
+        unsafe {
+            llvm::LLVMPositionBuilderAtEnd(bx.llbuilder, llbb);
+        }
+        bx
+    }
+
     fn with_cx(cx: &'a CodegenCx<'ll, 'tcx>) -> Self {
         // Create a fresh builder from the crate context.
         let llbuilder = unsafe { llvm::LLVMCreateBuilderInContext(cx.llcx) };
@@ -1231,13 +1349,16 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
     pub(crate) fn llfn(&self) -> &'ll Value {
         unsafe { llvm::LLVMGetBasicBlockParent(self.llbb()) }
     }
+}
 
+impl<'a, 'll, CX: Borrow<SimpleCx<'ll>>> GenericBuilder<'a, 'll, CX> {
     fn position_at_start(&mut self, llbb: &'ll BasicBlock) {
         unsafe {
             llvm::LLVMRustPositionBuilderAtStart(self.llbuilder, llbb);
         }
     }
-
+}
+impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
     fn align_metadata(&mut self, load: &'ll Value, align: Align) {
         unsafe {
             let md = [llvm::LLVMValueAsMetadata(self.cx.const_u64(align.bytes()))];
@@ -1259,7 +1380,8 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
             self.set_metadata(inst, llvm::MD_unpredictable, md);
         }
     }
-
+}
+impl<'a, 'll, CX: Borrow<SimpleCx<'ll>>> GenericBuilder<'a, 'll, CX> {
     pub(crate) fn minnum(&mut self, lhs: &'ll Value, rhs: &'ll Value) -> &'ll Value {
         unsafe { llvm::LLVMRustBuildMinNum(self.llbuilder, lhs, rhs) }
     }
@@ -1360,7 +1482,9 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
         let ret = unsafe { llvm::LLVMBuildCatchRet(self.llbuilder, funclet.cleanuppad(), unwind) };
         ret.expect("LLVM does not have support for catchret")
     }
+}
 
+impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
     fn check_call<'b>(
         &mut self,
         typ: &str,
@@ -1401,11 +1525,13 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
 
         Cow::Owned(casted_args)
     }
-
+}
+impl<'a, 'll, CX: Borrow<SimpleCx<'ll>>> GenericBuilder<'a, 'll, CX> {
     pub(crate) fn va_arg(&mut self, list: &'ll Value, ty: &'ll Type) -> &'ll Value {
         unsafe { llvm::LLVMBuildVAArg(self.llbuilder, list, ty, UNNAMED) }
     }
-
+}
+impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
     pub(crate) fn call_intrinsic(&mut self, intrinsic: &str, args: &[&'ll Value]) -> &'ll Value {
         let (ty, f) = self.cx.get_intrinsic(intrinsic);
         self.call(ty, None, None, f, args, None, None)
@@ -1423,7 +1549,8 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
 
         self.call_intrinsic(intrinsic, &[self.cx.const_u64(size), ptr]);
     }
-
+}
+impl<'a, 'll, CX: Borrow<SimpleCx<'ll>>> GenericBuilder<'a, 'll, CX> {
     pub(crate) fn phi(
         &mut self,
         ty: &'ll Type,
@@ -1443,7 +1570,8 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
             llvm::LLVMAddIncoming(phi, &val, &bb, 1 as c_uint);
         }
     }
-
+}
+impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
     fn fptoint_sat(&mut self, signed: bool, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
         let src_ty = self.cx.val_ty(val);
         let (float_ty, int_ty, vector_length) = if self.cx.type_kind(src_ty) == TypeKind::Vector {
diff --git a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
index 38f7eaa090f..9e8e4e1c567 100644
--- a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
+++ b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
@@ -3,20 +3,19 @@ use std::ptr;
 use rustc_ast::expand::autodiff_attrs::{AutoDiffAttrs, AutoDiffItem, DiffActivity, DiffMode};
 use rustc_codegen_ssa::ModuleCodegen;
 use rustc_codegen_ssa::back::write::ModuleConfig;
-use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, BuilderMethods};
 use rustc_errors::FatalError;
-use rustc_middle::ty::TyCtxt;
 use rustc_session::config::Lto;
 use tracing::{debug, trace};
 
 use crate::back::write::{llvm_err, llvm_optimize};
-use crate::builder::Builder;
-use crate::declare::declare_raw_fn;
+use crate::builder::SBuilder;
+use crate::context::SimpleCx;
+use crate::declare::declare_simple_fn;
 use crate::errors::LlvmError;
 use crate::llvm::AttributePlace::Function;
 use crate::llvm::{Metadata, True};
 use crate::value::Value;
-use crate::{CodegenContext, LlvmCodegenBackend, ModuleLlvm, attributes, context, llvm};
+use crate::{CodegenContext, LlvmCodegenBackend, ModuleLlvm, attributes, llvm};
 
 fn get_params(fnc: &Value) -> Vec<&Value> {
     unsafe {
@@ -38,8 +37,8 @@ fn get_params(fnc: &Value) -> Vec<&Value> {
 /// [^1]: <https://enzyme.mit.edu/getting_started/CallingConvention/>
 // FIXME(ZuseZ4): `outer_fn` should include upstream safety checks to
 // cover some assumptions of enzyme/autodiff, which could lead to UB otherwise.
-fn generate_enzyme_call<'ll, 'tcx>(
-    cx: &context::CodegenCx<'ll, 'tcx>,
+fn generate_enzyme_call<'ll>(
+    cx: &SimpleCx<'ll>,
     fn_to_diff: &'ll Value,
     outer_fn: &'ll Value,
     attrs: AutoDiffAttrs,
@@ -63,8 +62,8 @@ fn generate_enzyme_call<'ll, 'tcx>(
     // add outer_fn name to ad_name to make it unique, in case users apply autodiff to multiple
     // functions. Unwrap will only panic, if LLVM gave us an invalid string.
     let name = llvm::get_value_name(outer_fn);
-    let outer_fn_name = std::ffi::CStr::from_bytes_with_nul(name).unwrap().to_str().unwrap();
-    ad_name.push_str(outer_fn_name.to_string().as_str());
+    let outer_fn_name = std::str::from_utf8(name).unwrap();
+    ad_name.push_str(outer_fn_name);
 
     // Let us assume the user wrote the following function square:
     //
@@ -112,7 +111,7 @@ fn generate_enzyme_call<'ll, 'tcx>(
         //FIXME(ZuseZ4): the CC/Addr/Vis values are best effort guesses, we should look at tests and
         // think a bit more about what should go here.
         let cc = llvm::LLVMGetFunctionCallConv(outer_fn);
-        let ad_fn = declare_raw_fn(
+        let ad_fn = declare_simple_fn(
             cx,
             &ad_name,
             llvm::CallConv::try_from(cc).expect("invalid callconv"),
@@ -132,7 +131,7 @@ fn generate_enzyme_call<'ll, 'tcx>(
         llvm::LLVMRustEraseInstFromParent(br);
 
         let last_inst = llvm::LLVMRustGetLastInstruction(entry).unwrap();
-        let mut builder = Builder::build(cx, entry);
+        let mut builder = SBuilder::build(cx, entry);
 
         let num_args = llvm::LLVMCountParams(&fn_to_diff);
         let mut args = Vec::with_capacity(num_args as usize + 1);
@@ -236,7 +235,7 @@ fn generate_enzyme_call<'ll, 'tcx>(
             }
         }
 
-        let call = builder.call(enzyme_ty, None, None, ad_fn, &args, None, None);
+        let call = builder.call(enzyme_ty, ad_fn, &args, None);
 
         // This part is a bit iffy. LLVM requires that a call to an inlineable function has some
         // metadata attachted to it, but we just created this code oota. Given that the
@@ -256,14 +255,14 @@ fn generate_enzyme_call<'ll, 'tcx>(
             // have no debug info to copy, which would then be ok.
             trace!("no dbg info");
         }
+
         // Now that we copied the metadata, get rid of dummy code.
-        llvm::LLVMRustEraseInstBefore(entry, last_inst);
-        llvm::LLVMRustEraseInstFromParent(last_inst);
+        llvm::LLVMRustEraseInstUntilInclusive(entry, last_inst);
 
-        if cx.val_ty(outer_fn) != cx.type_void() {
-            builder.ret(call);
-        } else {
+        if cx.val_ty(call) == cx.type_void() {
             builder.ret_void();
+        } else {
+            builder.ret(call);
         }
 
         // Let's crash in case that we messed something up above and generated invalid IR.
@@ -274,10 +273,9 @@ fn generate_enzyme_call<'ll, 'tcx>(
     }
 }
 
-pub(crate) fn differentiate<'ll, 'tcx>(
+pub(crate) fn differentiate<'ll>(
     module: &'ll ModuleCodegen<ModuleLlvm>,
     cgcx: &CodegenContext<LlvmCodegenBackend>,
-    tcx: TyCtxt<'tcx>,
     diff_items: Vec<AutoDiffItem>,
     config: &ModuleConfig,
 ) -> Result<(), FatalError> {
@@ -286,8 +284,7 @@ pub(crate) fn differentiate<'ll, 'tcx>(
     }
 
     let diag_handler = cgcx.create_dcx();
-    let (_, cgus) = tcx.collect_and_partition_mono_items(());
-    let cx = context::CodegenCx::new(tcx, &cgus.first().unwrap(), &module.module_llvm);
+    let cx = SimpleCx { llmod: module.module_llvm.llmod(), llcx: module.module_llvm.llcx };
 
     // Before dumping the module, we want all the TypeTrees to become part of the module.
     for item in diff_items.iter() {
diff --git a/compiler/rustc_codegen_llvm/src/common.rs b/compiler/rustc_codegen_llvm/src/common.rs
index adfe8aeb5c5..8c94a46ebf3 100644
--- a/compiler/rustc_codegen_llvm/src/common.rs
+++ b/compiler/rustc_codegen_llvm/src/common.rs
@@ -126,6 +126,10 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
         unsafe { llvm::LLVMGetUndef(t) }
     }
 
+    fn is_undef(&self, v: &'ll Value) -> bool {
+        unsafe { llvm::LLVMIsUndef(v) == True }
+    }
+
     fn const_poison(&self, t: &'ll Type) -> &'ll Value {
         unsafe { llvm::LLVMGetPoison(t) }
     }
@@ -221,6 +225,8 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
                     llvm::LLVMSetUnnamedAddress(g, llvm::UnnamedAddr::Global);
                 }
                 llvm::set_linkage(g, llvm::Linkage::InternalLinkage);
+                // Cast to default address space if globals are in a different addrspace
+                let g = self.const_pointercast(g, self.type_ptr());
                 (s.to_owned(), g)
             })
             .1;
@@ -285,7 +291,7 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
                             let alloc = alloc.inner();
                             let value = match alloc.mutability {
                                 Mutability::Mut => self.static_addr_of_mut(init, alloc.align, None),
-                                _ => self.static_addr_of(init, alloc.align, None),
+                                _ => self.static_addr_of_impl(init, alloc.align, None),
                             };
                             if !self.sess().fewer_names() && llvm::get_value_name(value).is_empty()
                             {
@@ -308,10 +314,15 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
                     GlobalAlloc::VTable(ty, dyn_ty) => {
                         let alloc = self
                             .tcx
-                            .global_alloc(self.tcx.vtable_allocation((ty, dyn_ty.principal())))
+                            .global_alloc(self.tcx.vtable_allocation((
+                                ty,
+                                dyn_ty.principal().map(|principal| {
+                                    self.tcx.instantiate_bound_regions_with_erased(principal)
+                                }),
+                            )))
                             .unwrap_memory();
                         let init = const_alloc_to_llvm(self, alloc, /*static*/ false);
-                        let value = self.static_addr_of(init, alloc.inner().align, None);
+                        let value = self.static_addr_of_impl(init, alloc.inner().align, None);
                         (value, AddressSpace::DATA)
                     }
                     GlobalAlloc::Static(def_id) => {
@@ -323,7 +334,8 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
                 let llval = unsafe {
                     llvm::LLVMConstInBoundsGEP2(
                         self.type_i8(),
-                        self.const_bitcast(base_addr, self.type_ptr_ext(base_addr_space)),
+                        // Cast to the required address space if necessary
+                        self.const_pointercast(base_addr, self.type_ptr_ext(base_addr_space)),
                         &self.const_usize(offset.bytes()),
                         1,
                     )
diff --git a/compiler/rustc_codegen_llvm/src/consts.rs b/compiler/rustc_codegen_llvm/src/consts.rs
index c7114480d8b..771ebf2057f 100644
--- a/compiler/rustc_codegen_llvm/src/consts.rs
+++ b/compiler/rustc_codegen_llvm/src/consts.rs
@@ -210,6 +210,14 @@ impl<'ll> CodegenCx<'ll, '_> {
         unsafe { llvm::LLVMConstBitCast(val, ty) }
     }
 
+    pub(crate) fn const_pointercast(&self, val: &'ll Value, ty: &'ll Type) -> &'ll Value {
+        unsafe { llvm::LLVMConstPointerCast(val, ty) }
+    }
+
+    /// Create a global variable.
+    ///
+    /// The returned global variable is a pointer in the default address space for globals.
+    /// Fails if a symbol with the given name already exists.
     pub(crate) fn static_addr_of_mut(
         &self,
         cv: &'ll Value,
@@ -233,6 +241,34 @@ impl<'ll> CodegenCx<'ll, '_> {
         gv
     }
 
+    /// Create a global constant.
+    ///
+    /// The returned global variable is a pointer in the default address space for globals.
+    pub(crate) fn static_addr_of_impl(
+        &self,
+        cv: &'ll Value,
+        align: Align,
+        kind: Option<&str>,
+    ) -> &'ll Value {
+        if let Some(&gv) = self.const_globals.borrow().get(&cv) {
+            unsafe {
+                // Upgrade the alignment in cases where the same constant is used with different
+                // alignment requirements
+                let llalign = align.bytes() as u32;
+                if llalign > llvm::LLVMGetAlignment(gv) {
+                    llvm::LLVMSetAlignment(gv, llalign);
+                }
+            }
+            return gv;
+        }
+        let gv = self.static_addr_of_mut(cv, align, kind);
+        unsafe {
+            llvm::LLVMSetGlobalConstant(gv, True);
+        }
+        self.const_globals.borrow_mut().insert(cv, gv);
+        gv
+    }
+
     #[instrument(level = "debug", skip(self))]
     pub(crate) fn get_static(&self, def_id: DefId) -> &'ll Value {
         let instance = Instance::mono(self.tcx, def_id);
@@ -505,24 +541,15 @@ impl<'ll> CodegenCx<'ll, '_> {
 }
 
 impl<'ll> StaticCodegenMethods for CodegenCx<'ll, '_> {
+    /// Get a pointer to a global variable.
+    ///
+    /// The pointer will always be in the default address space. If global variables default to a
+    /// different address space, an addrspacecast is inserted.
     fn static_addr_of(&self, cv: &'ll Value, align: Align, kind: Option<&str>) -> &'ll Value {
-        if let Some(&gv) = self.const_globals.borrow().get(&cv) {
-            unsafe {
-                // Upgrade the alignment in cases where the same constant is used with different
-                // alignment requirements
-                let llalign = align.bytes() as u32;
-                if llalign > llvm::LLVMGetAlignment(gv) {
-                    llvm::LLVMSetAlignment(gv, llalign);
-                }
-            }
-            return gv;
-        }
-        let gv = self.static_addr_of_mut(cv, align, kind);
-        unsafe {
-            llvm::LLVMSetGlobalConstant(gv, True);
-        }
-        self.const_globals.borrow_mut().insert(cv, gv);
-        gv
+        let gv = self.static_addr_of_impl(cv, align, kind);
+        // static_addr_of_impl returns the bare global variable, which might not be in the default
+        // address space. Cast to the default address space if necessary.
+        self.const_pointercast(gv, self.type_ptr())
     }
 
     fn codegen_static(&self, def_id: DefId) {
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index 65345751842..ba4fd75fb94 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -1,11 +1,13 @@
 use std::borrow::Borrow;
 use std::cell::{Cell, RefCell};
 use std::ffi::{CStr, c_char, c_uint};
+use std::ops::Deref;
 use std::str;
 
 use rustc_abi::{HasDataLayout, TargetDataLayout, VariantIdx};
 use rustc_codegen_ssa::back::versioned_llvm_target;
 use rustc_codegen_ssa::base::{wants_msvc_seh, wants_wasm_eh};
+use rustc_codegen_ssa::common::TypeKind;
 use rustc_codegen_ssa::errors as ssa_errors;
 use rustc_codegen_ssa::traits::*;
 use rustc_data_structures::base_n::{ALPHANUMERIC_ONLY, ToBaseN};
@@ -30,30 +32,52 @@ use smallvec::SmallVec;
 
 use crate::back::write::to_llvm_code_model;
 use crate::callee::get_fn;
-use crate::common::AsCCharPtr;
+use crate::common::{self, AsCCharPtr};
 use crate::debuginfo::metadata::apply_vcall_visibility_metadata;
 use crate::llvm::{Metadata, MetadataType};
 use crate::type_::Type;
 use crate::value::Value;
 use crate::{attributes, coverageinfo, debuginfo, llvm, llvm_util};
 
+/// `TyCtxt` (and related cache datastructures) can't be move between threads.
+/// However, there are various cx related functions which we want to be available to the builder and
+/// other compiler pieces. Here we define a small subset which has enough information and can be
+/// moved around more freely.
+pub(crate) struct SimpleCx<'ll> {
+    pub llmod: &'ll llvm::Module,
+    pub llcx: &'ll llvm::Context,
+}
+
+impl<'ll> Borrow<SimpleCx<'ll>> for CodegenCx<'ll, '_> {
+    fn borrow(&self) -> &SimpleCx<'ll> {
+        &self.scx
+    }
+}
+
+impl<'ll, 'tcx> Deref for CodegenCx<'ll, 'tcx> {
+    type Target = SimpleCx<'ll>;
+
+    #[inline]
+    fn deref(&self) -> &Self::Target {
+        &self.scx
+    }
+}
+
 /// There is one `CodegenCx` per codegen unit. Each one has its own LLVM
 /// `llvm::Context` so that several codegen units may be processed in parallel.
 /// All other LLVM data structures in the `CodegenCx` are tied to that `llvm::Context`.
 pub(crate) struct CodegenCx<'ll, 'tcx> {
     pub tcx: TyCtxt<'tcx>,
+    pub scx: SimpleCx<'ll>,
     pub use_dll_storage_attrs: bool,
     pub tls_model: llvm::ThreadLocalMode,
 
-    pub llmod: &'ll llvm::Module,
-    pub llcx: &'ll llvm::Context,
     pub codegen_unit: &'tcx CodegenUnit<'tcx>,
 
     /// Cache instances of monomorphic and polymorphic items
     pub instances: RefCell<FxHashMap<Instance<'tcx>, &'ll Value>>,
     /// Cache generated vtables
-    pub vtables:
-        RefCell<FxHashMap<(Ty<'tcx>, Option<ty::PolyExistentialTraitRef<'tcx>>), &'ll Value>>,
+    pub vtables: RefCell<FxHashMap<(Ty<'tcx>, Option<ty::ExistentialTraitRef<'tcx>>), &'ll Value>>,
     /// Cache of constant strings,
     pub const_str_cache: RefCell<FxHashMap<String, &'ll Value>>,
 
@@ -553,10 +577,9 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
 
         CodegenCx {
             tcx,
+            scx: SimpleCx { llcx, llmod },
             use_dll_storage_attrs,
             tls_model,
-            llmod,
-            llcx,
             codegen_unit,
             instances: Default::default(),
             vtables: Default::default(),
@@ -600,6 +623,11 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
             llvm::set_section(g, c"llvm.metadata");
         }
     }
+}
+impl<'ll> SimpleCx<'ll> {
+    pub(crate) fn val_ty(&self, v: &'ll Value) -> &'ll Type {
+        common::val_ty(v)
+    }
 
     pub(crate) fn get_metadata_value(&self, metadata: &'ll Metadata) -> &'ll Value {
         unsafe { llvm::LLVMMetadataAsValue(self.llcx, metadata) }
@@ -625,20 +653,23 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
             llvm::LLVMMDStringInContext2(self.llcx, name.as_ptr() as *const c_char, name.len())
         })
     }
+
+    pub(crate) fn type_kind(&self, ty: &'ll Type) -> TypeKind {
+        unsafe { llvm::LLVMRustGetTypeKind(ty).to_generic() }
+    }
 }
 
 impl<'ll, 'tcx> MiscCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
     fn vtables(
         &self,
-    ) -> &RefCell<FxHashMap<(Ty<'tcx>, Option<ty::PolyExistentialTraitRef<'tcx>>), &'ll Value>>
-    {
+    ) -> &RefCell<FxHashMap<(Ty<'tcx>, Option<ty::ExistentialTraitRef<'tcx>>), &'ll Value>> {
         &self.vtables
     }
 
     fn apply_vcall_visibility_metadata(
         &self,
         ty: Ty<'tcx>,
-        poly_trait_ref: Option<ty::PolyExistentialTraitRef<'tcx>>,
+        poly_trait_ref: Option<ty::ExistentialTraitRef<'tcx>>,
         vtable: &'ll Value,
     ) {
         apply_vcall_visibility_metadata(self, ty, poly_trait_ref, vtable);
@@ -751,7 +782,6 @@ impl<'ll, 'tcx> MiscCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
         } else {
             // If the symbol already exists, it is an error: for example, the user wrote
             // #[no_mangle] extern "C" fn main(..) {..}
-            // instead of #[start]
             None
         }
     }
@@ -1179,6 +1209,20 @@ impl CodegenCx<'_, '_> {
     }
 }
 
+// This is a duplication of the set_metadata function above. However, so far it's the only one
+// shared between both contexts, so it doesn't seem worth it to make the Cx generic like we did it
+// for the Builder.
+impl SimpleCx<'_> {
+    #[allow(unused)]
+    /// A wrapper for [`llvm::LLVMSetMetadata`], but it takes `Metadata` as a parameter instead of `Value`.
+    pub(crate) fn set_metadata<'a>(&self, val: &'a Value, kind_id: MetadataType, md: &'a Metadata) {
+        unsafe {
+            let node = llvm::LLVMMetadataAsValue(&self.llcx, md);
+            llvm::LLVMSetMetadata(val, kind_id as c_uint, node);
+        }
+    }
+}
+
 impl HasDataLayout for CodegenCx<'_, '_> {
     #[inline]
     fn data_layout(&self) -> &TargetDataLayout {
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
index b3ad2a0e409..9a2473d6cf2 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
@@ -9,6 +9,7 @@ use rustc_data_structures::fx::{FxHashSet, FxIndexMap};
 use rustc_hir::def_id::{DefId, LocalDefId};
 use rustc_index::IndexVec;
 use rustc_middle::mir;
+use rustc_middle::mir::mono::MonoItemPartitions;
 use rustc_middle::ty::{self, TyCtxt};
 use rustc_session::RemapFileNameExt;
 use rustc_session::config::RemapPathScopeComponents;
@@ -297,12 +298,13 @@ struct UsageSets<'tcx> {
 /// Prepare sets of definitions that are relevant to deciding whether something
 /// is an "unused function" for coverage purposes.
 fn prepare_usage_sets<'tcx>(tcx: TyCtxt<'tcx>) -> UsageSets<'tcx> {
-    let (all_mono_items, cgus) = tcx.collect_and_partition_mono_items(());
+    let MonoItemPartitions { all_mono_items, codegen_units, .. } =
+        tcx.collect_and_partition_mono_items(());
 
     // Obtain a MIR body for each function participating in codegen, via an
     // arbitrary instance.
     let mut def_ids_seen = FxHashSet::default();
-    let def_and_mir_for_all_mono_fns = cgus
+    let def_and_mir_for_all_mono_fns = codegen_units
         .iter()
         .flat_map(|cgu| cgu.items().keys())
         .filter_map(|item| match item {
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
index 5428d776f41..460a4664615 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
@@ -51,7 +51,7 @@ pub(crate) fn prepare_covfun_record<'tcx>(
     is_used: bool,
 ) -> Option<CovfunRecord<'tcx>> {
     let fn_cov_info = tcx.instance_mir(instance.def).function_coverage_info.as_deref()?;
-    let ids_info = tcx.coverage_ids_info(instance.def);
+    let ids_info = tcx.coverage_ids_info(instance.def)?;
 
     let expressions = prepare_expressions(fn_cov_info, ids_info, is_used);
 
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
index 7311cd9d230..021108cd51c 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
@@ -8,7 +8,6 @@ use rustc_codegen_ssa::traits::{
 use rustc_data_structures::fx::{FxHashMap, FxIndexSet};
 use rustc_middle::mir::coverage::CoverageKind;
 use rustc_middle::ty::Instance;
-use rustc_middle::ty::layout::HasTyCtxt;
 use tracing::{debug, instrument};
 
 use crate::builder::Builder;
@@ -147,6 +146,10 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
             debug!("function has a coverage statement but no coverage info");
             return;
         };
+        let Some(ids_info) = bx.tcx.coverage_ids_info(instance.def) else {
+            debug!("function has a coverage statement but no IDs info");
+            return;
+        };
 
         // Mark the instance as used in this CGU, for coverage purposes.
         // This includes functions that were not partitioned into this CGU,
@@ -162,8 +165,7 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
                 // be smaller than the number originally inserted by the instrumentor,
                 // if some high-numbered counters were removed by MIR optimizations.
                 // If so, LLVM's profiler runtime will use fewer physical counters.
-                let num_counters =
-                    bx.tcx().coverage_ids_info(instance.def).num_counters_after_mir_opts();
+                let num_counters = ids_info.num_counters_after_mir_opts();
                 assert!(
                     num_counters as usize <= function_coverage_info.num_counters,
                     "num_counters disagreement: query says {num_counters} but function info only has {}",
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs b/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs
index e545ce386ed..11eb9651af6 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs
@@ -9,7 +9,7 @@ use rustc_middle::mir::{Body, SourceScope};
 use rustc_middle::ty::layout::{FnAbiOf, HasTypingEnv};
 use rustc_middle::ty::{self, Instance};
 use rustc_session::config::DebugInfo;
-use rustc_span::{BytePos, hygiene};
+use rustc_span::{BytePos, DUMMY_SP, hygiene};
 
 use super::metadata::file_metadata;
 use super::utils::DIB;
@@ -85,23 +85,15 @@ fn make_mir_scope<'ll, 'tcx>(
             discriminators,
             parent,
         );
-        if let Some(parent_scope) = debug_context.scopes[parent] {
-            parent_scope
-        } else {
-            // If the parent scope could not be represented then no children
-            // can be either.
-            debug_context.scopes[scope] = None;
-            instantiated.insert(scope);
-            return;
-        }
+        debug_context.scopes[parent]
     } else {
         // The root is the function itself.
         let file = cx.sess().source_map().lookup_source_file(mir.span.lo());
-        debug_context.scopes[scope] = Some(DebugScope {
+        debug_context.scopes[scope] = DebugScope {
             file_start_pos: file.start_pos,
             file_end_pos: file.end_position(),
-            ..debug_context.scopes[scope].unwrap()
-        });
+            ..debug_context.scopes[scope]
+        };
         instantiated.insert(scope);
         return;
     };
@@ -112,7 +104,7 @@ fn make_mir_scope<'ll, 'tcx>(
     {
         // Do not create a DIScope if there are no variables defined in this
         // MIR `SourceScope`, and it's not `inlined`, to avoid debuginfo bloat.
-        debug_context.scopes[scope] = Some(parent_scope);
+        debug_context.scopes[scope] = parent_scope;
         instantiated.insert(scope);
         return;
     }
@@ -145,14 +137,7 @@ fn make_mir_scope<'ll, 'tcx>(
         },
     };
 
-    let mut debug_scope = Some(DebugScope {
-        dbg_scope,
-        inlined_at: parent_scope.inlined_at,
-        file_start_pos: loc.file.start_pos,
-        file_end_pos: loc.file.end_position(),
-    });
-
-    if let Some((_, callsite_span)) = scope_data.inlined {
+    let inlined_at = scope_data.inlined.map(|(_, callsite_span)| {
         let callsite_span = hygiene::walk_chain_collapsed(callsite_span, mir.span);
         let callsite_scope = parent_scope.adjust_dbg_scope_for_span(cx, callsite_span);
         let loc = cx.dbg_loc(callsite_scope, parent_scope.inlined_at, callsite_span);
@@ -175,29 +160,29 @@ fn make_mir_scope<'ll, 'tcx>(
         // Note further that we can't key this hashtable on the span itself,
         // because these spans could have distinct SyntaxContexts. We have
         // to key on exactly what we're giving to LLVM.
-        let inlined_at = match discriminators.entry(callsite_span.lo()) {
+        match discriminators.entry(callsite_span.lo()) {
             Entry::Occupied(mut o) => {
                 *o.get_mut() += 1;
+                // NB: We have to emit *something* here or we'll fail LLVM IR verification
+                // in at least some circumstances (see issue #135322) so if the required
+                // discriminant cannot be encoded fall back to the dummy location.
                 unsafe { llvm::LLVMRustDILocationCloneWithBaseDiscriminator(loc, *o.get()) }
+                    .unwrap_or_else(|| {
+                        cx.dbg_loc(callsite_scope, parent_scope.inlined_at, DUMMY_SP)
+                    })
             }
             Entry::Vacant(v) => {
                 v.insert(0);
-                Some(loc)
-            }
-        };
-        match inlined_at {
-            Some(inlined_at) => {
-                debug_scope.as_mut().unwrap().inlined_at = Some(inlined_at);
-            }
-            None => {
-                // LLVM has a maximum discriminator that it can encode (currently
-                // it uses 12 bits for 4096 possible values). If we exceed that
-                // there is little we can do but drop the debug info.
-                debug_scope = None;
+                loc
             }
         }
-    }
+    });
 
-    debug_context.scopes[scope] = debug_scope;
+    debug_context.scopes[scope] = DebugScope {
+        dbg_scope,
+        inlined_at: inlined_at.or(parent_scope.inlined_at),
+        file_start_pos: loc.file.start_pos,
+        file_end_pos: loc.file.end_position(),
+    };
     instantiated.insert(scope);
 }
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
index 88e43e1c678..3a0c7f007bd 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
@@ -13,7 +13,7 @@ use rustc_hir::def_id::{DefId, LOCAL_CRATE};
 use rustc_middle::bug;
 use rustc_middle::ty::layout::{HasTypingEnv, LayoutOf, TyAndLayout};
 use rustc_middle::ty::{
-    self, AdtKind, CoroutineArgsExt, Instance, PolyExistentialTraitRef, Ty, TyCtxt, Visibility,
+    self, AdtKind, CoroutineArgsExt, ExistentialTraitRef, Instance, Ty, TyCtxt, Visibility,
 };
 use rustc_session::config::{self, DebugInfo, Lto};
 use rustc_span::{DUMMY_SP, FileName, FileNameDisplayPreference, SourceFile, Symbol, hygiene};
@@ -442,7 +442,7 @@ pub(crate) fn type_di_node<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, t: Ty<'tcx>) ->
         // (or if there is no allocator argument).
         ty::Adt(def, args)
             if def.is_box()
-                && args.get(1).map_or(true, |arg| cx.layout_of(arg.expect_ty()).is_1zst()) =>
+                && args.get(1).is_none_or(|arg| cx.layout_of(arg.expect_ty()).is_1zst()) =>
         {
             build_pointer_or_reference_di_node(cx, t, t.expect_boxed_ty(), unique_type_id)
         }
@@ -919,8 +919,7 @@ pub(crate) fn build_compile_unit_di_node<'ll, 'tcx>(
         .unwrap_or_default();
     let kind = DebugEmissionKind::from_generic(tcx.sess.opts.debuginfo);
 
-    let dwarf_version =
-        tcx.sess.opts.unstable_opts.dwarf_version.unwrap_or(tcx.sess.target.default_dwarf_version);
+    let dwarf_version = tcx.sess.dwarf_version();
     let is_dwarf_kind =
         matches!(tcx.sess.target.debuginfo_kind, DebuginfoKind::Dwarf | DebuginfoKind::DwarfDsym);
     // Don't emit `.debug_pubnames` and `.debug_pubtypes` on DWARFv4 or lower.
@@ -1400,7 +1399,7 @@ pub(crate) fn build_global_var_di_node<'ll>(
 fn build_vtable_type_di_node<'ll, 'tcx>(
     cx: &CodegenCx<'ll, 'tcx>,
     ty: Ty<'tcx>,
-    poly_trait_ref: Option<ty::PolyExistentialTraitRef<'tcx>>,
+    poly_trait_ref: Option<ty::ExistentialTraitRef<'tcx>>,
 ) -> &'ll DIType {
     let tcx = cx.tcx;
 
@@ -1488,10 +1487,30 @@ fn build_vtable_type_di_node<'ll, 'tcx>(
     .di_node
 }
 
+/// Get the global variable for the vtable.
+///
+/// When using global variables, we may have created an addrspacecast to get a pointer to the
+/// default address space if global variables are created in a different address space.
+/// For modifying the vtable, we need the real global variable. This function accepts either a
+/// global variable (which is simply returned), or an addrspacecast constant expression.
+/// If the given value is an addrspacecast, the cast is removed and the global variable behind
+/// the cast is returned.
+fn find_vtable_behind_cast<'ll>(vtable: &'ll Value) -> &'ll Value {
+    // The vtable is a global variable, which may be behind an addrspacecast.
+    unsafe {
+        if let Some(c) = llvm::LLVMIsAConstantExpr(vtable) {
+            if llvm::LLVMGetConstOpcode(c) == llvm::Opcode::AddrSpaceCast {
+                return llvm::LLVMGetOperand(c, 0).unwrap();
+            }
+        }
+    }
+    vtable
+}
+
 pub(crate) fn apply_vcall_visibility_metadata<'ll, 'tcx>(
     cx: &CodegenCx<'ll, 'tcx>,
     ty: Ty<'tcx>,
-    trait_ref: Option<PolyExistentialTraitRef<'tcx>>,
+    trait_ref: Option<ExistentialTraitRef<'tcx>>,
     vtable: &'ll Value,
 ) {
     // FIXME(flip1995): The virtual function elimination optimization only works with full LTO in
@@ -1508,9 +1527,11 @@ pub(crate) fn apply_vcall_visibility_metadata<'ll, 'tcx>(
 
     let Some(trait_ref) = trait_ref else { return };
 
+    // Unwrap potential addrspacecast
+    let vtable = find_vtable_behind_cast(vtable);
     let trait_ref_self = trait_ref.with_self_ty(cx.tcx, ty);
     let trait_ref_self = cx.tcx.erase_regions(trait_ref_self);
-    let trait_def_id = trait_ref_self.def_id();
+    let trait_def_id = trait_ref_self.def_id;
     let trait_vis = cx.tcx.visibility(trait_def_id);
 
     let cgus = cx.sess().codegen_units().as_usize();
@@ -1569,7 +1590,7 @@ pub(crate) fn apply_vcall_visibility_metadata<'ll, 'tcx>(
 pub(crate) fn create_vtable_di_node<'ll, 'tcx>(
     cx: &CodegenCx<'ll, 'tcx>,
     ty: Ty<'tcx>,
-    poly_trait_ref: Option<ty::PolyExistentialTraitRef<'tcx>>,
+    poly_trait_ref: Option<ty::ExistentialTraitRef<'tcx>>,
     vtable: &'ll Value,
 ) {
     if cx.dbg_cx.is_none() {
@@ -1581,6 +1602,9 @@ pub(crate) fn create_vtable_di_node<'ll, 'tcx>(
         return;
     }
 
+    // Unwrap potential addrspacecast
+    let vtable = find_vtable_behind_cast(vtable);
+
     // When full debuginfo is enabled, we want to try and prevent vtables from being
     // merged. Otherwise debuggers will have a hard time mapping from dyn pointer
     // to concrete type.
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs
index a37e719d43f..af1d503ad6a 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs
@@ -6,7 +6,7 @@ use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
 use rustc_macros::HashStable;
 use rustc_middle::bug;
-use rustc_middle::ty::{self, PolyExistentialTraitRef, Ty, TyCtxt};
+use rustc_middle::ty::{self, ExistentialTraitRef, Ty, TyCtxt};
 
 use super::{DefinitionLocation, SmallVec, UNKNOWN_LINE_NUMBER, unknown_file_metadata};
 use crate::common::{AsCCharPtr, CodegenCx};
@@ -44,7 +44,7 @@ pub(super) enum UniqueTypeId<'tcx> {
     /// The ID for the additional wrapper struct type describing an enum variant in CPP-like mode.
     VariantStructTypeCppLikeWrapper(Ty<'tcx>, VariantIdx, private::HiddenZst),
     /// The ID of the artificial type we create for VTables.
-    VTableTy(Ty<'tcx>, Option<PolyExistentialTraitRef<'tcx>>, private::HiddenZst),
+    VTableTy(Ty<'tcx>, Option<ExistentialTraitRef<'tcx>>, private::HiddenZst),
 }
 
 impl<'tcx> UniqueTypeId<'tcx> {
@@ -88,7 +88,7 @@ impl<'tcx> UniqueTypeId<'tcx> {
     pub(crate) fn for_vtable_ty(
         tcx: TyCtxt<'tcx>,
         self_type: Ty<'tcx>,
-        implemented_trait: Option<PolyExistentialTraitRef<'tcx>>,
+        implemented_trait: Option<ExistentialTraitRef<'tcx>>,
     ) -> Self {
         assert_eq!(
             self_type,
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
index 755f4816acf..b1ce52667bd 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
@@ -22,6 +22,7 @@ use rustc_session::config::{self, DebugInfo};
 use rustc_span::{
     BytePos, Pos, SourceFile, SourceFileAndLine, SourceFileHash, Span, StableSourceFileId, Symbol,
 };
+use rustc_target::spec::DebuginfoKind;
 use smallvec::SmallVec;
 use tracing::debug;
 
@@ -93,29 +94,31 @@ impl<'ll, 'tcx> CodegenUnitDebugContext<'ll, 'tcx> {
 
     pub(crate) fn finalize(&self, sess: &Session) {
         unsafe { llvm::LLVMRustDIBuilderFinalize(self.builder) };
-        if !sess.target.is_like_msvc {
-            // Debuginfo generation in LLVM by default uses a higher
-            // version of dwarf than macOS currently understands. We can
-            // instruct LLVM to emit an older version of dwarf, however,
-            // for macOS to understand. For more info see #11352
-            // This can be overridden using --llvm-opts -dwarf-version,N.
-            // Android has the same issue (#22398)
-            let dwarf_version =
-                sess.opts.unstable_opts.dwarf_version.unwrap_or(sess.target.default_dwarf_version);
-            llvm::add_module_flag_u32(
-                self.llmod,
-                llvm::ModuleFlagMergeBehavior::Warning,
-                "Dwarf Version",
-                dwarf_version,
-            );
-        } else {
-            // Indicate that we want CodeView debug information on MSVC
-            llvm::add_module_flag_u32(
-                self.llmod,
-                llvm::ModuleFlagMergeBehavior::Warning,
-                "CodeView",
-                1,
-            );
+
+        match sess.target.debuginfo_kind {
+            DebuginfoKind::Dwarf | DebuginfoKind::DwarfDsym => {
+                // Debuginfo generation in LLVM by default uses a higher
+                // version of dwarf than macOS currently understands. We can
+                // instruct LLVM to emit an older version of dwarf, however,
+                // for macOS to understand. For more info see #11352
+                // This can be overridden using --llvm-opts -dwarf-version,N.
+                // Android has the same issue (#22398)
+                llvm::add_module_flag_u32(
+                    self.llmod,
+                    llvm::ModuleFlagMergeBehavior::Warning,
+                    "Dwarf Version",
+                    sess.dwarf_version(),
+                );
+            }
+            DebuginfoKind::Pdb => {
+                // Indicate that we want CodeView debug information
+                llvm::add_module_flag_u32(
+                    self.llmod,
+                    llvm::ModuleFlagMergeBehavior::Warning,
+                    "CodeView",
+                    1,
+                );
+            }
         }
 
         // Prevent bitcode readers from deleting the debug info.
@@ -295,12 +298,12 @@ impl<'ll, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
         }
 
         // Initialize fn debug context (including scopes).
-        let empty_scope = Some(DebugScope {
+        let empty_scope = DebugScope {
             dbg_scope: self.dbg_scope_fn(instance, fn_abi, Some(llfn)),
             inlined_at: None,
             file_start_pos: BytePos(0),
             file_end_pos: BytePos(0),
-        });
+        };
         let mut fn_debug_context = FunctionDebugContext {
             scopes: IndexVec::from_elem(empty_scope, &mir.source_scopes),
             inlined_function_scopes: Default::default(),
@@ -585,7 +588,7 @@ impl<'ll, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
     fn create_vtable_debuginfo(
         &self,
         ty: Ty<'tcx>,
-        trait_ref: Option<ty::PolyExistentialTraitRef<'tcx>>,
+        trait_ref: Option<ty::ExistentialTraitRef<'tcx>>,
         vtable: Self::Value,
     ) {
         metadata::create_vtable_di_node(self, ty, trait_ref, vtable)
diff --git a/compiler/rustc_codegen_llvm/src/declare.rs b/compiler/rustc_codegen_llvm/src/declare.rs
index c72b5b5611f..bdc83267cca 100644
--- a/compiler/rustc_codegen_llvm/src/declare.rs
+++ b/compiler/rustc_codegen_llvm/src/declare.rs
@@ -21,26 +21,26 @@ use tracing::debug;
 
 use crate::abi::{FnAbi, FnAbiLlvmExt};
 use crate::common::AsCCharPtr;
-use crate::context::CodegenCx;
+use crate::context::{CodegenCx, SimpleCx};
 use crate::llvm::AttributePlace::Function;
 use crate::llvm::Visibility;
 use crate::type_::Type;
 use crate::value::Value;
 use crate::{attributes, llvm};
 
-/// Declare a function.
+/// Declare a function with a SimpleCx.
 ///
 /// If there’s a value with the same name already declared, the function will
 /// update the declaration and return existing Value instead.
-pub(crate) fn declare_raw_fn<'ll>(
-    cx: &CodegenCx<'ll, '_>,
+pub(crate) fn declare_simple_fn<'ll>(
+    cx: &SimpleCx<'ll>,
     name: &str,
     callconv: llvm::CallConv,
     unnamed: llvm::UnnamedAddr,
     visibility: llvm::Visibility,
     ty: &'ll Type,
 ) -> &'ll Value {
-    debug!("declare_raw_fn(name={:?}, ty={:?})", name, ty);
+    debug!("declare_simple_fn(name={:?}, ty={:?})", name, ty);
     let llfn = unsafe {
         llvm::LLVMRustGetOrInsertFunction(cx.llmod, name.as_c_char_ptr(), name.len(), ty)
     };
@@ -49,6 +49,24 @@ pub(crate) fn declare_raw_fn<'ll>(
     llvm::SetUnnamedAddress(llfn, unnamed);
     llvm::set_visibility(llfn, visibility);
 
+    llfn
+}
+
+/// Declare a function.
+///
+/// If there’s a value with the same name already declared, the function will
+/// update the declaration and return existing Value instead.
+pub(crate) fn declare_raw_fn<'ll, 'tcx>(
+    cx: &CodegenCx<'ll, 'tcx>,
+    name: &str,
+    callconv: llvm::CallConv,
+    unnamed: llvm::UnnamedAddr,
+    visibility: llvm::Visibility,
+    ty: &'ll Type,
+) -> &'ll Value {
+    debug!("declare_raw_fn(name={:?}, ty={:?})", name, ty);
+    let llfn = declare_simple_fn(cx, name, callconv, unnamed, visibility, ty);
+
     let mut attrs = SmallVec::<[_; 4]>::new();
 
     if cx.tcx.sess.opts.cg.no_redzone.unwrap_or(cx.tcx.sess.target.disable_redzone) {
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index cabcfc9b42b..43d6ccfcb4a 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -1081,11 +1081,11 @@ fn codegen_emcc_try<'ll>(
 
 // Helper function to give a Block to a closure to codegen a shim function.
 // This is currently primarily used for the `try` intrinsic functions above.
-fn gen_fn<'ll, 'tcx>(
-    cx: &CodegenCx<'ll, 'tcx>,
+fn gen_fn<'a, 'll, 'tcx>(
+    cx: &'a CodegenCx<'ll, 'tcx>,
     name: &str,
     rust_fn_sig: ty::PolyFnSig<'tcx>,
-    codegen: &mut dyn FnMut(Builder<'_, 'll, 'tcx>),
+    codegen: &mut dyn FnMut(Builder<'a, 'll, 'tcx>),
 ) -> (&'ll Type, &'ll Value) {
     let fn_abi = cx.fn_abi_of_fn_ptr(rust_fn_sig, ty::List::empty());
     let llty = fn_abi.llvm_type(cx);
@@ -1104,9 +1104,9 @@ fn gen_fn<'ll, 'tcx>(
 // catch exceptions.
 //
 // This function is only generated once and is then cached.
-fn get_rust_try_fn<'ll, 'tcx>(
-    cx: &CodegenCx<'ll, 'tcx>,
-    codegen: &mut dyn FnMut(Builder<'_, 'll, 'tcx>),
+fn get_rust_try_fn<'a, 'll, 'tcx>(
+    cx: &'a CodegenCx<'ll, 'tcx>,
+    codegen: &mut dyn FnMut(Builder<'a, 'll, 'tcx>),
 ) -> (&'ll Type, &'ll Value) {
     if let Some(llfn) = cx.rust_try_fn.get() {
         return llfn;
@@ -1182,6 +1182,60 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         }};
     }
 
+    /// Returns the bitwidth of the `$ty` argument if it is an `Int` type.
+    macro_rules! require_int_ty {
+        ($ty: expr, $diag: expr) => {
+            match $ty {
+                ty::Int(i) => i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits()),
+                _ => {
+                    return_error!($diag);
+                }
+            }
+        };
+    }
+
+    /// Returns the bitwidth of the `$ty` argument if it is an `Int` or `Uint` type.
+    macro_rules! require_int_or_uint_ty {
+        ($ty: expr, $diag: expr) => {
+            match $ty {
+                ty::Int(i) => i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits()),
+                ty::Uint(i) => {
+                    i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits())
+                }
+                _ => {
+                    return_error!($diag);
+                }
+            }
+        };
+    }
+
+    /// Converts a vector mask, where each element has a bit width equal to the data elements it is used with,
+    /// down to an i1 based mask that can be used by llvm intrinsics.
+    ///
+    /// The rust simd semantics are that each element should either consist of all ones or all zeroes,
+    /// but this information is not available to llvm. Truncating the vector effectively uses the lowest bit,
+    /// but codegen for several targets is better if we consider the highest bit by shifting.
+    ///
+    /// For x86 SSE/AVX targets this is beneficial since most instructions with mask parameters only consider the highest bit.
+    /// So even though on llvm level we have an additional shift, in the final assembly there is no shift or truncate and
+    /// instead the mask can be used as is.
+    ///
+    /// For aarch64 and other targets there is a benefit because a mask from the sign bit can be more
+    /// efficiently converted to an all ones / all zeroes mask by comparing whether each element is negative.
+    fn vector_mask_to_bitmask<'a, 'll, 'tcx>(
+        bx: &mut Builder<'a, 'll, 'tcx>,
+        i_xn: &'ll Value,
+        in_elem_bitwidth: u64,
+        in_len: u64,
+    ) -> &'ll Value {
+        // Shift the MSB to the right by "in_elem_bitwidth - 1" into the first bit position.
+        let shift_idx = bx.cx.const_int(bx.type_ix(in_elem_bitwidth), (in_elem_bitwidth - 1) as _);
+        let shift_indices = vec![shift_idx; in_len as _];
+        let i_xn_msb = bx.lshr(i_xn, bx.const_vector(shift_indices.as_slice()));
+        // Truncate vector to an <i1 x N>
+        bx.trunc(i_xn_msb, bx.type_vector(bx.type_i1(), in_len))
+    }
+
     let tcx = bx.tcx();
     let sig = tcx.normalize_erasing_late_bound_regions(bx.typing_env(), callee_ty.fn_sig(tcx));
     let arg_tys = sig.inputs();
@@ -1275,7 +1329,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
     }
 
     if name == sym::simd_shuffle_generic {
-        let idx = fn_args[2].expect_const().try_to_valtree().unwrap().0.unwrap_branch();
+        let idx = fn_args[2].expect_const().to_value().valtree.unwrap_branch();
         let n = idx.len() as u64;
 
         let (out_len, out_ty) = require_simd!(ret_ty, SimdReturn);
@@ -1433,14 +1487,13 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             m_len,
             v_len
         });
-        match m_elem_ty.kind() {
-            ty::Int(_) => {}
-            _ => return_error!(InvalidMonomorphization::MaskType { span, name, ty: m_elem_ty }),
-        }
-        // truncate the mask to a vector of i1s
-        let i1 = bx.type_i1();
-        let i1xn = bx.type_vector(i1, m_len as u64);
-        let m_i1s = bx.trunc(args[0].immediate(), i1xn);
+        let in_elem_bitwidth =
+            require_int_ty!(m_elem_ty.kind(), InvalidMonomorphization::MaskType {
+                span,
+                name,
+                ty: m_elem_ty
+            });
+        let m_i1s = vector_mask_to_bitmask(bx, args[0].immediate(), in_elem_bitwidth, m_len);
         return Ok(bx.select(m_i1s, args[1].immediate(), args[2].immediate()));
     }
 
@@ -1457,33 +1510,15 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         let expected_bytes = in_len.div_ceil(8);
 
         // Integer vector <i{in_bitwidth} x in_len>:
-        let (i_xn, in_elem_bitwidth) = match in_elem.kind() {
-            ty::Int(i) => (
-                args[0].immediate(),
-                i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits()),
-            ),
-            ty::Uint(i) => (
-                args[0].immediate(),
-                i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits()),
-            ),
-            _ => return_error!(InvalidMonomorphization::VectorArgument {
+        let in_elem_bitwidth =
+            require_int_or_uint_ty!(in_elem.kind(), InvalidMonomorphization::VectorArgument {
                 span,
                 name,
                 in_ty,
                 in_elem
-            }),
-        };
+            });
 
-        // LLVM doesn't always know the inputs are `0` or `!0`, so we shift here so it optimizes to
-        // `pmovmskb` and similar on x86.
-        let shift_indices =
-            vec![
-                bx.cx.const_int(bx.type_ix(in_elem_bitwidth), (in_elem_bitwidth - 1) as _);
-                in_len as _
-            ];
-        let i_xn_msb = bx.lshr(i_xn, bx.const_vector(shift_indices.as_slice()));
-        // Truncate vector to an <i1 x N>
-        let i1xn = bx.trunc(i_xn_msb, bx.type_vector(bx.type_i1(), in_len));
+        let i1xn = vector_mask_to_bitmask(bx, args[0].immediate(), in_elem_bitwidth, in_len);
         // Bitcast <i1 x N> to iN:
         let i_ = bx.bitcast(i1xn, bx.type_ix(in_len));
 
@@ -1704,28 +1739,21 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             }
         );
 
-        match element_ty2.kind() {
-            ty::Int(_) => (),
-            _ => {
-                return_error!(InvalidMonomorphization::ThirdArgElementType {
-                    span,
-                    name,
-                    expected_element: element_ty2,
-                    third_arg: arg_tys[2]
-                });
-            }
-        }
+        let mask_elem_bitwidth =
+            require_int_ty!(element_ty2.kind(), InvalidMonomorphization::ThirdArgElementType {
+                span,
+                name,
+                expected_element: element_ty2,
+                third_arg: arg_tys[2]
+            });
 
         // Alignment of T, must be a constant integer value:
         let alignment_ty = bx.type_i32();
         let alignment = bx.const_i32(bx.align_of(in_elem).bytes() as i32);
 
         // Truncate the mask vector to a vector of i1s:
-        let (mask, mask_ty) = {
-            let i1 = bx.type_i1();
-            let i1xn = bx.type_vector(i1, in_len);
-            (bx.trunc(args[2].immediate(), i1xn), i1xn)
-        };
+        let mask = vector_mask_to_bitmask(bx, args[2].immediate(), mask_elem_bitwidth, in_len);
+        let mask_ty = bx.type_vector(bx.type_i1(), in_len);
 
         // Type of the vector of pointers:
         let llvm_pointer_vec_ty = llvm_vector_ty(bx, element_ty1, in_len);
@@ -1810,27 +1838,21 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             }
         );
 
-        require!(
-            matches!(mask_elem.kind(), ty::Int(_)),
-            InvalidMonomorphization::ThirdArgElementType {
+        let m_elem_bitwidth =
+            require_int_ty!(mask_elem.kind(), InvalidMonomorphization::ThirdArgElementType {
                 span,
                 name,
                 expected_element: values_elem,
                 third_arg: mask_ty,
-            }
-        );
+            });
+
+        let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len);
+        let mask_ty = bx.type_vector(bx.type_i1(), mask_len);
 
         // Alignment of T, must be a constant integer value:
         let alignment_ty = bx.type_i32();
         let alignment = bx.const_i32(bx.align_of(values_elem).bytes() as i32);
 
-        // Truncate the mask vector to a vector of i1s:
-        let (mask, mask_ty) = {
-            let i1 = bx.type_i1();
-            let i1xn = bx.type_vector(i1, mask_len);
-            (bx.trunc(args[0].immediate(), i1xn), i1xn)
-        };
-
         let llvm_pointer = bx.type_ptr();
 
         // Type of the vector of elements:
@@ -1901,27 +1923,21 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             }
         );
 
-        require!(
-            matches!(mask_elem.kind(), ty::Int(_)),
-            InvalidMonomorphization::ThirdArgElementType {
+        let m_elem_bitwidth =
+            require_int_ty!(mask_elem.kind(), InvalidMonomorphization::ThirdArgElementType {
                 span,
                 name,
                 expected_element: values_elem,
                 third_arg: mask_ty,
-            }
-        );
+            });
+
+        let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len);
+        let mask_ty = bx.type_vector(bx.type_i1(), mask_len);
 
         // Alignment of T, must be a constant integer value:
         let alignment_ty = bx.type_i32();
         let alignment = bx.const_i32(bx.align_of(values_elem).bytes() as i32);
 
-        // Truncate the mask vector to a vector of i1s:
-        let (mask, mask_ty) = {
-            let i1 = bx.type_i1();
-            let i1xn = bx.type_vector(i1, in_len);
-            (bx.trunc(args[0].immediate(), i1xn), i1xn)
-        };
-
         let ret_t = bx.type_void();
 
         let llvm_pointer = bx.type_ptr();
@@ -1995,28 +2011,21 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         );
 
         // The element type of the third argument must be a signed integer type of any width:
-        match element_ty2.kind() {
-            ty::Int(_) => (),
-            _ => {
-                return_error!(InvalidMonomorphization::ThirdArgElementType {
-                    span,
-                    name,
-                    expected_element: element_ty2,
-                    third_arg: arg_tys[2]
-                });
-            }
-        }
+        let mask_elem_bitwidth =
+            require_int_ty!(element_ty2.kind(), InvalidMonomorphization::ThirdArgElementType {
+                span,
+                name,
+                expected_element: element_ty2,
+                third_arg: arg_tys[2]
+            });
 
         // Alignment of T, must be a constant integer value:
         let alignment_ty = bx.type_i32();
         let alignment = bx.const_i32(bx.align_of(in_elem).bytes() as i32);
 
         // Truncate the mask vector to a vector of i1s:
-        let (mask, mask_ty) = {
-            let i1 = bx.type_i1();
-            let i1xn = bx.type_vector(i1, in_len);
-            (bx.trunc(args[2].immediate(), i1xn), i1xn)
-        };
+        let mask = vector_mask_to_bitmask(bx, args[2].immediate(), mask_elem_bitwidth, in_len);
+        let mask_ty = bx.type_vector(bx.type_i1(), in_len);
 
         let ret_t = bx.type_void();
 
@@ -2164,8 +2173,13 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
                     });
                     args[0].immediate()
                 } else {
-                    match in_elem.kind() {
-                        ty::Int(_) | ty::Uint(_) => {}
+                    let bitwidth = match in_elem.kind() {
+                        ty::Int(i) => {
+                            i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits())
+                        }
+                        ty::Uint(i) => {
+                            i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits())
+                        }
                         _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
                             span,
                             name,
@@ -2174,12 +2188,9 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
                             in_elem,
                             ret_ty
                         }),
-                    }
+                    };
 
-                    // boolean reductions operate on vectors of i1s:
-                    let i1 = bx.type_i1();
-                    let i1xn = bx.type_vector(i1, in_len as u64);
-                    bx.trunc(args[0].immediate(), i1xn)
+                    vector_mask_to_bitmask(bx, args[0].immediate(), bitwidth, in_len as _)
                 };
                 return match in_elem.kind() {
                     ty::Int(_) | ty::Uint(_) => {
diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
index 06afe8bb3ad..4a84fd29e44 100644
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -237,7 +237,6 @@ impl WriteBackendMethods for LlvmCodegenBackend {
     /// Generate autodiff rules
     fn autodiff(
         cgcx: &CodegenContext<Self>,
-        tcx: TyCtxt<'_>,
         module: &ModuleCodegen<Self::Module>,
         diff_fncs: Vec<AutoDiffItem>,
         config: &ModuleConfig,
@@ -246,7 +245,7 @@ impl WriteBackendMethods for LlvmCodegenBackend {
             let dcx = cgcx.create_dcx();
             return Err(dcx.handle().emit_almost_fatal(AutoDiffWithoutLTO));
         }
-        builder::autodiff::differentiate(module, cgcx, tcx, diff_fncs, config)
+        builder::autodiff::differentiate(module, cgcx, diff_fncs, config)
     }
 }
 
diff --git a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
index 729d6f62e24..ae813fe5ebf 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
@@ -7,11 +7,13 @@ use crate::llvm::Bool;
 extern "C" {
     // Enzyme
     pub fn LLVMRustHasMetadata(I: &Value, KindID: c_uint) -> bool;
-    pub fn LLVMRustEraseInstBefore(BB: &BasicBlock, I: &Value);
+    pub fn LLVMRustEraseInstUntilInclusive(BB: &BasicBlock, I: &Value);
     pub fn LLVMRustGetLastInstruction<'a>(BB: &BasicBlock) -> Option<&'a Value>;
     pub fn LLVMRustDIGetInstMetadata(I: &Value) -> Option<&Metadata>;
     pub fn LLVMRustEraseInstFromParent(V: &Value);
     pub fn LLVMRustGetTerminator<'a>(B: &BasicBlock) -> &'a Value;
+    pub fn LLVMDumpModule(M: &Module);
+    pub fn LLVMDumpValue(V: &Value);
     pub fn LLVMRustVerifyFunction(V: &Value, action: LLVMRustVerifierFailureAction) -> Bool;
 
     pub fn LLVMGetFunctionCallConv(F: &Value) -> c_uint;
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index ec6c84f6f25..cc7c5231aca 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -661,6 +661,79 @@ pub enum MemoryEffects {
     InaccessibleMemOnly,
 }
 
+/// LLVMOpcode
+#[derive(Copy, Clone, PartialEq, Eq)]
+#[repr(C)]
+pub enum Opcode {
+    Ret = 1,
+    Br = 2,
+    Switch = 3,
+    IndirectBr = 4,
+    Invoke = 5,
+    Unreachable = 7,
+    CallBr = 67,
+    FNeg = 66,
+    Add = 8,
+    FAdd = 9,
+    Sub = 10,
+    FSub = 11,
+    Mul = 12,
+    FMul = 13,
+    UDiv = 14,
+    SDiv = 15,
+    FDiv = 16,
+    URem = 17,
+    SRem = 18,
+    FRem = 19,
+    Shl = 20,
+    LShr = 21,
+    AShr = 22,
+    And = 23,
+    Or = 24,
+    Xor = 25,
+    Alloca = 26,
+    Load = 27,
+    Store = 28,
+    GetElementPtr = 29,
+    Trunc = 30,
+    ZExt = 31,
+    SExt = 32,
+    FPToUI = 33,
+    FPToSI = 34,
+    UIToFP = 35,
+    SIToFP = 36,
+    FPTrunc = 37,
+    FPExt = 38,
+    PtrToInt = 39,
+    IntToPtr = 40,
+    BitCast = 41,
+    AddrSpaceCast = 60,
+    ICmp = 42,
+    FCmp = 43,
+    PHI = 44,
+    Call = 45,
+    Select = 46,
+    UserOp1 = 47,
+    UserOp2 = 48,
+    VAArg = 49,
+    ExtractElement = 50,
+    InsertElement = 51,
+    ShuffleVector = 52,
+    ExtractValue = 53,
+    InsertValue = 54,
+    Freeze = 68,
+    Fence = 55,
+    AtomicCmpXchg = 56,
+    AtomicRMW = 57,
+    Resume = 58,
+    LandingPad = 59,
+    CleanupRet = 61,
+    CatchRet = 62,
+    CatchPad = 63,
+    CleanupPad = 64,
+    CatchSwitch = 65,
+}
+
 unsafe extern "C" {
     type Opaque;
 }
@@ -741,8 +814,11 @@ pub mod debuginfo {
     pub type DIEnumerator = DIDescriptor;
     pub type DITemplateTypeParameter = DIDescriptor;
 
-    // These values **must** match with LLVMRustDIFlags!!
     bitflags! {
+        /// Must match the layout of `LLVMDIFlags` in the LLVM-C API.
+        ///
+        /// Each value declared here must also be covered by the static
+        /// assertions in `RustWrapper.cpp` used by `fromRust(LLVMDIFlags)`.
         #[repr(transparent)]
         #[derive(Clone, Copy, Default)]
         pub struct DIFlags: u32 {
@@ -752,7 +828,7 @@ pub mod debuginfo {
             const FlagPublic              = 3;
             const FlagFwdDecl             = (1 << 2);
             const FlagAppleBlock          = (1 << 3);
-            const FlagBlockByrefStruct    = (1 << 4);
+            const FlagReservedBit4        = (1 << 4);
             const FlagVirtual             = (1 << 5);
             const FlagArtificial          = (1 << 6);
             const FlagExplicit            = (1 << 7);
@@ -763,10 +839,21 @@ pub mod debuginfo {
             const FlagStaticMember        = (1 << 12);
             const FlagLValueReference     = (1 << 13);
             const FlagRValueReference     = (1 << 14);
-            const FlagExternalTypeRef     = (1 << 15);
+            const FlagReserved            = (1 << 15);
+            const FlagSingleInheritance   = (1 << 16);
+            const FlagMultipleInheritance = (2 << 16);
+            const FlagVirtualInheritance  = (3 << 16);
             const FlagIntroducedVirtual   = (1 << 18);
             const FlagBitField            = (1 << 19);
             const FlagNoReturn            = (1 << 20);
+            // The bit at (1 << 21) is unused, but was `LLVMDIFlagMainSubprogram`.
+            const FlagTypePassByValue     = (1 << 22);
+            const FlagTypePassByReference = (1 << 23);
+            const FlagEnumClass           = (1 << 24);
+            const FlagThunk               = (1 << 25);
+            const FlagNonTrivial          = (1 << 26);
+            const FlagBigEndian           = (1 << 27);
+            const FlagLittleEndian        = (1 << 28);
         }
     }
 
@@ -918,6 +1005,7 @@ unsafe extern "C" {
     pub fn LLVMMetadataTypeInContext(C: &Context) -> &Type;
 
     // Operations on all values
+    pub fn LLVMIsUndef(Val: &Value) -> Bool;
     pub fn LLVMTypeOf(Val: &Value) -> &Type;
     pub fn LLVMGetValueName2(Val: &Value, Length: *mut size_t) -> *const c_char;
     pub fn LLVMSetValueName2(Val: &Value, Name: *const c_char, NameLen: size_t);
@@ -976,7 +1064,10 @@ unsafe extern "C" {
     pub fn LLVMConstPtrToInt<'a>(ConstantVal: &'a Value, ToType: &'a Type) -> &'a Value;
     pub fn LLVMConstIntToPtr<'a>(ConstantVal: &'a Value, ToType: &'a Type) -> &'a Value;
     pub fn LLVMConstBitCast<'a>(ConstantVal: &'a Value, ToType: &'a Type) -> &'a Value;
+    pub fn LLVMConstPointerCast<'a>(ConstantVal: &'a Value, ToType: &'a Type) -> &'a Value;
     pub fn LLVMGetAggregateElement(ConstantVal: &Value, Idx: c_uint) -> Option<&Value>;
+    pub fn LLVMGetConstOpcode(ConstantVal: &Value) -> Opcode;
+    pub fn LLVMIsAConstantExpr(Val: &Value) -> Option<&Value>;
 
     // Operations on global variables, functions, and aliases (globals)
     pub fn LLVMIsDeclaration(Global: &Value) -> Bool;
@@ -1033,6 +1124,7 @@ unsafe extern "C" {
     // Operations on instructions
     pub fn LLVMIsAInstruction(Val: &Value) -> Option<&Value>;
     pub fn LLVMGetFirstBasicBlock(Fn: &Value) -> &BasicBlock;
+    pub fn LLVMGetOperand(Val: &Value, Index: c_uint) -> Option<&Value>;
 
     // Operations on call sites
     pub fn LLVMSetInstructionCallConv(Instr: &Value, CC: c_uint);
diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs
index c3d7c217861..53611c746a7 100644
--- a/compiler/rustc_codegen_llvm/src/llvm_util.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs
@@ -319,7 +319,6 @@ pub fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol>
         sess.target
             .rust_target_features()
             .iter()
-            .filter(|(_, gate, _)| gate.in_cfg())
             .filter(|(feature, _, _)| {
                 // skip checking special features, as LLVM may not understand them
                 if RUSTC_SPECIAL_FEATURES.contains(feature) {
@@ -388,9 +387,13 @@ pub fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol>
     sess.target
         .rust_target_features()
         .iter()
-        .filter(|(_, gate, _)| gate.in_cfg())
         .filter_map(|(feature, gate, _)| {
-            if sess.is_nightly_build() || allow_unstable || gate.requires_nightly().is_none() {
+            // The `allow_unstable` set is used by rustc internally to determined which target
+            // features are truly available, so we want to return even perma-unstable "forbidden"
+            // features.
+            if allow_unstable
+                || (gate.in_cfg() && (sess.is_nightly_build() || gate.requires_nightly().is_none()))
+            {
                 Some(*feature)
             } else {
                 None
@@ -670,12 +673,6 @@ pub(crate) fn global_llvm_features(
         // Will only be filled when `diagnostics` is set!
         let mut featsmap = FxHashMap::default();
 
-        // Ensure that all ABI-required features are enabled, and the ABI-forbidden ones
-        // are disabled.
-        let abi_feature_constraints = sess.target.abi_required_features();
-        let abi_incompatible_set =
-            FxHashSet::from_iter(abi_feature_constraints.incompatible.iter().copied());
-
         // Compute implied features
         let mut all_rust_features = vec![];
         for feature in sess.opts.cg.target_feature.split(',') {
@@ -746,52 +743,11 @@ pub(crate) fn global_llvm_features(
                     }
                 }
 
-                // Ensure that the features we enable/disable are compatible with the ABI.
-                if enable {
-                    if abi_incompatible_set.contains(feature) {
-                        sess.dcx().emit_warn(ForbiddenCTargetFeature {
-                            feature,
-                            enabled: "enabled",
-                            reason: "this feature is incompatible with the target ABI",
-                        });
-                    }
-                } else {
-                    // FIXME: we have to request implied features here since
-                    // negative features do not handle implied features above.
-                    for &required in abi_feature_constraints.required.iter() {
-                        let implied =
-                            sess.target.implied_target_features(std::iter::once(required));
-                        if implied.contains(feature) {
-                            sess.dcx().emit_warn(ForbiddenCTargetFeature {
-                                feature,
-                                enabled: "disabled",
-                                reason: "this feature is required by the target ABI",
-                            });
-                        }
-                    }
-                }
-
                 // FIXME(nagisa): figure out how to not allocate a full hashset here.
                 featsmap.insert(feature, enable);
             }
         }
 
-        // To be sure the ABI-relevant features are all in the right state, we explicitly
-        // (un)set them here. This means if the target spec sets those features wrong,
-        // we will silently correct them rather than silently producing wrong code.
-        // (The target sanity check tries to catch this, but we can't know which features are
-        // enabled in LLVM by default so we can't be fully sure about that check.)
-        // We add these at the beginning of the list so that `-Ctarget-features` can
-        // still override it... that's unsound, but more compatible with past behavior.
-        all_rust_features.splice(
-            0..0,
-            abi_feature_constraints
-                .required
-                .iter()
-                .map(|&f| (true, f))
-                .chain(abi_feature_constraints.incompatible.iter().map(|&f| (false, f))),
-        );
-
         // Translate this into LLVM features.
         let feats = all_rust_features
             .iter()
diff --git a/compiler/rustc_codegen_llvm/src/type_.rs b/compiler/rustc_codegen_llvm/src/type_.rs
index 6aec078e0de..c56ad886120 100644
--- a/compiler/rustc_codegen_llvm/src/type_.rs
+++ b/compiler/rustc_codegen_llvm/src/type_.rs
@@ -11,7 +11,7 @@ use rustc_middle::ty::{self, Ty};
 use rustc_target::callconv::{CastTarget, FnAbi, Reg};
 
 use crate::abi::{FnAbiLlvmExt, LlvmType};
-use crate::context::CodegenCx;
+use crate::context::{CodegenCx, SimpleCx};
 pub(crate) use crate::llvm::Type;
 use crate::llvm::{Bool, False, Metadata, True};
 use crate::type_of::LayoutLlvmExt;
@@ -35,7 +35,8 @@ impl fmt::Debug for Type {
     }
 }
 
-impl<'ll> CodegenCx<'ll, '_> {
+impl<'ll> CodegenCx<'ll, '_> {}
+impl<'ll> SimpleCx<'ll> {
     pub(crate) fn type_named_struct(&self, name: &str) -> &'ll Type {
         let name = SmallCStr::new(name);
         unsafe { llvm::LLVMStructCreateNamed(self.llcx, name.as_ptr()) }
@@ -44,11 +45,9 @@ impl<'ll> CodegenCx<'ll, '_> {
     pub(crate) fn set_struct_body(&self, ty: &'ll Type, els: &[&'ll Type], packed: bool) {
         unsafe { llvm::LLVMStructSetBody(ty, els.as_ptr(), els.len() as c_uint, packed as Bool) }
     }
-
     pub(crate) fn type_void(&self) -> &'ll Type {
         unsafe { llvm::LLVMVoidTypeInContext(self.llcx) }
     }
-
     pub(crate) fn type_token(&self) -> &'ll Type {
         unsafe { llvm::LLVMTokenTypeInContext(self.llcx) }
     }
@@ -75,7 +74,8 @@ impl<'ll> CodegenCx<'ll, '_> {
             args
         }
     }
-
+}
+impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
     pub(crate) fn type_bool(&self) -> &'ll Type {
         self.type_i8()
     }
@@ -120,7 +120,8 @@ impl<'ll> CodegenCx<'ll, '_> {
         assert_eq!(size % unit_size, 0);
         self.type_array(self.type_from_integer(unit), size / unit_size)
     }
-
+}
+impl<'ll> SimpleCx<'ll> {
     pub(crate) fn type_variadic_func(&self, args: &[&'ll Type], ret: &'ll Type) -> &'ll Type {
         unsafe { llvm::LLVMFunctionType(ret, args.as_ptr(), args.len() as c_uint, True) }
     }