14 files changed, 396 insertions, 85 deletions
diff --git a/compiler/rustc_codegen_llvm/src/abi.rs b/compiler/rustc_codegen_llvm/src/abi.rs
index 8294e29d07d..c87e70864e5 100644
--- a/compiler/rustc_codegen_llvm/src/abi.rs
+++ b/compiler/rustc_codegen_llvm/src/abi.rs
@@ -172,7 +172,6 @@ impl LlvmType for CastTarget {
 }
 
 trait ArgAbiExt<'ll, 'tcx> {
-    fn memory_ty(&self, cx: &CodegenCx<'ll, 'tcx>) -> &'ll Type;
     fn store(
         &self,
         bx: &mut Builder<'_, 'll, 'tcx>,
@@ -188,12 +187,6 @@ trait ArgAbiExt<'ll, 'tcx> {
 }
 
 impl<'ll, 'tcx> ArgAbiExt<'ll, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
-    /// Gets the LLVM type for a place of the original Rust type of
-    /// this argument/return, i.e., the result of `type_of::type_of`.
-    fn memory_ty(&self, cx: &CodegenCx<'ll, 'tcx>) -> &'ll Type {
-        self.layout.llvm_type(cx)
-    }
-
     /// Stores a direct/indirect value described by this ArgAbi into a
     /// place for the original Rust type of this argument/return.
     /// Can be used for both storing formal arguments into Rust variables
@@ -302,9 +295,6 @@ impl<'ll, 'tcx> ArgAbiBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
     ) {
         arg_abi.store(self, val, dst)
     }
-    fn arg_memory_ty(&self, arg_abi: &ArgAbi<'tcx, Ty<'tcx>>) -> &'ll Type {
-        arg_abi.memory_ty(self)
-    }
 }
 
 pub(crate) trait FnAbiLlvmExt<'ll, 'tcx> {
diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs
index cb329323f5d..ee46b49a094 100644
--- a/compiler/rustc_codegen_llvm/src/back/lto.rs
+++ b/compiler/rustc_codegen_llvm/src/back/lto.rs
@@ -799,7 +799,7 @@ impl Drop for ThinBuffer {
     }
 }
 
-pub(crate) unsafe fn optimize_thin_module(
+pub(crate) fn optimize_thin_module(
     thin_module: ThinModule<LlvmCodegenBackend>,
     cgcx: &CodegenContext<LlvmCodegenBackend>,
 ) -> Result<ModuleCodegen<ModuleLlvm>, FatalError> {
diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs
index 20721c74608..bde6a9cf4bc 100644
--- a/compiler/rustc_codegen_llvm/src/back/write.rs
+++ b/compiler/rustc_codegen_llvm/src/back/write.rs
@@ -704,7 +704,7 @@ pub(crate) unsafe fn llvm_optimize(
 }
 
 // Unsafe due to LLVM calls.
-pub(crate) unsafe fn optimize(
+pub(crate) fn optimize(
     cgcx: &CodegenContext<LlvmCodegenBackend>,
     dcx: DiagCtxtHandle<'_>,
     module: &mut ModuleCodegen<ModuleLlvm>,
@@ -815,7 +815,7 @@ pub(crate) fn link(
     Ok(modules.remove(0))
 }
 
-pub(crate) unsafe fn codegen(
+pub(crate) fn codegen(
     cgcx: &CodegenContext<LlvmCodegenBackend>,
     dcx: DiagCtxtHandle<'_>,
     module: ModuleCodegen<ModuleLlvm>,
diff --git a/compiler/rustc_codegen_llvm/src/base.rs b/compiler/rustc_codegen_llvm/src/base.rs
index e4fac35aa44..5dda836988c 100644
--- a/compiler/rustc_codegen_llvm/src/base.rs
+++ b/compiler/rustc_codegen_llvm/src/base.rs
@@ -86,17 +86,24 @@ pub(crate) fn compile_codegen_unit(
             let mut cx = CodegenCx::new(tcx, cgu, &llvm_module);
             let mono_items = cx.codegen_unit.items_in_deterministic_order(cx.tcx);
             for &(mono_item, data) in &mono_items {
-                mono_item.predefine::<Builder<'_, '_, '_>>(&cx, data.linkage, data.visibility);
+                mono_item.predefine::<Builder<'_, '_, '_>>(
+                    &mut cx,
+                    cgu_name.as_str(),
+                    data.linkage,
+                    data.visibility,
+                );
             }
 
             // ... and now that we have everything pre-defined, fill out those definitions.
             for &(mono_item, item_data) in &mono_items {
-                mono_item.define::<Builder<'_, '_, '_>>(&mut cx, item_data);
+                mono_item.define::<Builder<'_, '_, '_>>(&mut cx, cgu_name.as_str(), item_data);
             }
 
             // If this codegen unit contains the main function, also create the
             // wrapper here
-            if let Some(entry) = maybe_create_entry_wrapper::<Builder<'_, '_, '_>>(&cx) {
+            if let Some(entry) =
+                maybe_create_entry_wrapper::<Builder<'_, '_, '_>>(&cx, cx.codegen_unit)
+            {
                 let attrs = attributes::sanitize_attrs(&cx, SanitizerSet::empty());
                 attributes::apply_to_llfn(entry, llvm::AttributePlace::Function, &attrs);
             }
@@ -108,14 +115,11 @@ pub(crate) fn compile_codegen_unit(
             }
 
             // Create the llvm.used and llvm.compiler.used variables.
-            if !cx.used_statics.borrow().is_empty() {
-                cx.create_used_variable_impl(c"llvm.used", &*cx.used_statics.borrow());
+            if !cx.used_statics.is_empty() {
+                cx.create_used_variable_impl(c"llvm.used", &cx.used_statics);
             }
-            if !cx.compiler_used_statics.borrow().is_empty() {
-                cx.create_used_variable_impl(
-                    c"llvm.compiler.used",
-                    &*cx.compiler_used_statics.borrow(),
-                );
+            if !cx.compiler_used_statics.is_empty() {
+                cx.create_used_variable_impl(c"llvm.compiler.used", &cx.compiler_used_statics);
             }
 
             // Run replace-all-uses-with for statics that need it. This must
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index 5238755c8eb..167678c2ff1 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -612,7 +612,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         &mut self,
         ty: &'ll Type,
         ptr: &'ll Value,
-        order: rustc_codegen_ssa::common::AtomicOrdering,
+        order: rustc_middle::ty::AtomicOrdering,
         size: Size,
     ) -> &'ll Value {
         unsafe {
@@ -851,7 +851,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         &mut self,
         val: &'ll Value,
         ptr: &'ll Value,
-        order: rustc_codegen_ssa::common::AtomicOrdering,
+        order: rustc_middle::ty::AtomicOrdering,
         size: Size,
     ) {
         debug!("Store {:?} -> {:?}", val, ptr);
@@ -1307,8 +1307,8 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         dst: &'ll Value,
         cmp: &'ll Value,
         src: &'ll Value,
-        order: rustc_codegen_ssa::common::AtomicOrdering,
-        failure_order: rustc_codegen_ssa::common::AtomicOrdering,
+        order: rustc_middle::ty::AtomicOrdering,
+        failure_order: rustc_middle::ty::AtomicOrdering,
         weak: bool,
     ) -> (&'ll Value, &'ll Value) {
         let weak = if weak { llvm::True } else { llvm::False };
@@ -1334,7 +1334,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         op: rustc_codegen_ssa::common::AtomicRmwBinOp,
         dst: &'ll Value,
         mut src: &'ll Value,
-        order: rustc_codegen_ssa::common::AtomicOrdering,
+        order: rustc_middle::ty::AtomicOrdering,
     ) -> &'ll Value {
         // The only RMW operation that LLVM supports on pointers is compare-exchange.
         let requires_cast_to_int = self.val_ty(src) == self.type_ptr()
@@ -1360,7 +1360,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
 
     fn atomic_fence(
         &mut self,
-        order: rustc_codegen_ssa::common::AtomicOrdering,
+        order: rustc_middle::ty::AtomicOrdering,
         scope: SynchronizationScope,
     ) {
         let single_threaded = match scope {
@@ -1452,9 +1452,15 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
 impl<'ll> StaticBuilderMethods for Builder<'_, 'll, '_> {
     fn get_static(&mut self, def_id: DefId) -> &'ll Value {
         // Forward to the `get_static` method of `CodegenCx`
-        let s = self.cx().get_static(def_id);
-        // Cast to default address space if globals are in a different addrspace
-        self.cx().const_pointercast(s, self.type_ptr())
+        let global = self.cx().get_static(def_id);
+        if self.cx().tcx.is_thread_local_static(def_id) {
+            let pointer = self.call_intrinsic("llvm.threadlocal.address", &[global]);
+            // Cast to default address space if globals are in a different addrspace
+            self.pointercast(pointer, self.type_ptr())
+        } else {
+            // Cast to default address space if globals are in a different addrspace
+            self.cx().const_pointercast(global, self.type_ptr())
+        }
     }
 }
 
diff --git a/compiler/rustc_codegen_llvm/src/consts.rs b/compiler/rustc_codegen_llvm/src/consts.rs
index fe2f2027327..4234352c93a 100644
--- a/compiler/rustc_codegen_llvm/src/consts.rs
+++ b/compiler/rustc_codegen_llvm/src/consts.rs
@@ -411,7 +411,7 @@ impl<'ll> CodegenCx<'ll, '_> {
         g
     }
 
-    fn codegen_static_item(&self, def_id: DefId) {
+    fn codegen_static_item(&mut self, def_id: DefId) {
         unsafe {
             assert!(
                 llvm::LLVMGetInitializer(
@@ -557,6 +557,17 @@ impl<'ll> CodegenCx<'ll, '_> {
             }
         }
     }
+
+    /// Add a global value to a list to be stored in the `llvm.used` variable, an array of ptr.
+    pub(crate) fn add_used_global(&mut self, global: &'ll Value) {
+        self.used_statics.push(global);
+    }
+
+    /// Add a global value to a list to be stored in the `llvm.compiler.used` variable,
+    /// an array of ptr.
+    pub(crate) fn add_compiler_used_global(&mut self, global: &'ll Value) {
+        self.compiler_used_statics.push(global);
+    }
 }
 
 impl<'ll> StaticCodegenMethods for CodegenCx<'ll, '_> {
@@ -571,18 +582,7 @@ impl<'ll> StaticCodegenMethods for CodegenCx<'ll, '_> {
         self.const_pointercast(gv, self.type_ptr())
     }
 
-    fn codegen_static(&self, def_id: DefId) {
+    fn codegen_static(&mut self, def_id: DefId) {
         self.codegen_static_item(def_id)
     }
-
-    /// Add a global value to a list to be stored in the `llvm.used` variable, an array of ptr.
-    fn add_used_global(&self, global: &'ll Value) {
-        self.used_statics.borrow_mut().push(global);
-    }
-
-    /// Add a global value to a list to be stored in the `llvm.compiler.used` variable,
-    /// an array of ptr.
-    fn add_compiler_used_global(&self, global: &'ll Value) {
-        self.compiler_used_statics.borrow_mut().push(global);
-    }
 }
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index b0d8e11d1fb..8d6e1d8941b 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -2,7 +2,7 @@ use std::borrow::Borrow;
 use std::cell::{Cell, RefCell};
 use std::ffi::{CStr, c_char, c_uint};
 use std::marker::PhantomData;
-use std::ops::Deref;
+use std::ops::{Deref, DerefMut};
 use std::str;
 
 use rustc_abi::{HasDataLayout, Size, TargetDataLayout, VariantIdx};
@@ -77,6 +77,13 @@ impl<'ll, T: Borrow<SCx<'ll>>> Deref for GenericCx<'ll, T> {
     }
 }
 
+impl<'ll, T: Borrow<SCx<'ll>>> DerefMut for GenericCx<'ll, T> {
+    #[inline]
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
+
 pub(crate) type SimpleCx<'ll> = GenericCx<'ll, SCx<'ll>>;
 
 /// There is one `CodegenCx` per codegen unit. Each one has its own LLVM
@@ -110,11 +117,11 @@ pub(crate) struct FullCx<'ll, 'tcx> {
 
     /// Statics that will be placed in the llvm.used variable
     /// See <https://llvm.org/docs/LangRef.html#the-llvm-used-global-variable> for details
-    pub used_statics: RefCell<Vec<&'ll Value>>,
+    pub used_statics: Vec<&'ll Value>,
 
     /// Statics that will be placed in the llvm.compiler.used variable
     /// See <https://llvm.org/docs/LangRef.html#the-llvm-compiler-used-global-variable> for details
-    pub compiler_used_statics: RefCell<Vec<&'ll Value>>,
+    pub compiler_used_statics: Vec<&'ll Value>,
 
     /// Mapping of non-scalar types to llvm types.
     pub type_lowering: RefCell<FxHashMap<(Ty<'tcx>, Option<VariantIdx>), &'ll Type>>,
@@ -606,8 +613,8 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
                 const_str_cache: Default::default(),
                 const_globals: Default::default(),
                 statics_to_rauw: RefCell::new(Vec::new()),
-                used_statics: RefCell::new(Vec::new()),
-                compiler_used_statics: RefCell::new(Vec::new()),
+                used_statics: Vec::new(),
+                compiler_used_statics: Vec::new(),
                 type_lowering: Default::default(),
                 scalar_lltypes: Default::default(),
                 coverage_cx,
@@ -801,10 +808,6 @@ impl<'ll, 'tcx> MiscCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
         self.tcx.sess
     }
 
-    fn codegen_unit(&self) -> &'tcx CodegenUnit<'tcx> {
-        self.codegen_unit
-    }
-
     fn set_frame_pointer_type(&self, llfn: &'ll Value) {
         if let Some(attr) = attributes::frame_pointer_type_attr(self) {
             attributes::apply_to_llfn(llfn, llvm::AttributePlace::Function, &[attr]);
@@ -1240,6 +1243,7 @@ impl<'ll> CodegenCx<'ll, '_> {
         }
 
         ifn!("llvm.ptrmask", fn(ptr, t_isize) -> ptr);
+        ifn!("llvm.threadlocal.address", fn(ptr) -> ptr);
 
         None
     }
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
index 55b1e728b70..a9be833a643 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
@@ -2,9 +2,7 @@ use std::sync::Arc;
 
 use itertools::Itertools;
 use rustc_abi::Align;
-use rustc_codegen_ssa::traits::{
-    BaseTypeCodegenMethods, ConstCodegenMethods, StaticCodegenMethods,
-};
+use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, ConstCodegenMethods};
 use rustc_data_structures::fx::FxIndexMap;
 use rustc_index::IndexVec;
 use rustc_middle::ty::TyCtxt;
@@ -27,7 +25,7 @@ mod unused;
 ///
 /// Those sections are then read and understood by LLVM's `llvm-cov` tool,
 /// which is distributed in the `llvm-tools` rustup component.
-pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
+pub(crate) fn finalize(cx: &mut CodegenCx<'_, '_>) {
     let tcx = cx.tcx;
 
     // Ensure that LLVM is using a version of the coverage mapping format that
@@ -62,6 +60,7 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
         .sorted_by_cached_key(|&instance| tcx.symbol_name(instance).name)
         .filter_map(|instance| prepare_covfun_record(tcx, instance, true))
         .collect::<Vec<_>>();
+    drop(instances_used);
 
     // In a single designated CGU, also prepare covfun records for functions
     // in this crate that were instrumented for coverage, but are unused.
@@ -206,7 +205,7 @@ impl VirtualFileMapping {
 /// Generates the contents of the covmap record for this CGU, which mostly
 /// consists of a header and a list of filenames. The record is then stored
 /// as a global variable in the `__llvm_covmap` section.
-fn generate_covmap_record<'ll>(cx: &CodegenCx<'ll, '_>, version: u32, filenames_buffer: &[u8]) {
+fn generate_covmap_record<'ll>(cx: &mut CodegenCx<'ll, '_>, version: u32, filenames_buffer: &[u8]) {
     // A covmap record consists of four target-endian u32 values, followed by
     // the encoded filenames table. Two of the header fields are unused in
     // modern versions of the LLVM coverage mapping format, and are always 0.
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
index 7bdbc685952..b704cf2b1cd 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
@@ -8,9 +8,7 @@ use std::ffi::CString;
 use std::sync::Arc;
 
 use rustc_abi::Align;
-use rustc_codegen_ssa::traits::{
-    BaseTypeCodegenMethods as _, ConstCodegenMethods, StaticCodegenMethods,
-};
+use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods as _, ConstCodegenMethods};
 use rustc_middle::mir::coverage::{
     BasicCoverageBlock, CovTerm, CoverageIdsInfo, Expression, FunctionCoverageInfo, Mapping,
     MappingKind, Op,
@@ -181,7 +179,7 @@ fn fill_region_tables<'tcx>(
 /// contains the function's coverage mapping data. The record is then stored
 /// as a global variable in the `__llvm_covfun` section.
 pub(crate) fn generate_covfun_record<'tcx>(
-    cx: &CodegenCx<'_, 'tcx>,
+    cx: &mut CodegenCx<'_, 'tcx>,
     global_file_table: &GlobalFileTable,
     covfun: &CovfunRecord<'tcx>,
 ) {
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
index ea7f581a3cb..eefbd7cf6c4 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
@@ -56,7 +56,7 @@ impl<'ll, 'tcx> CguCoverageContext<'ll, 'tcx> {
 }
 
 impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
-    pub(crate) fn coverageinfo_finalize(&self) {
+    pub(crate) fn coverageinfo_finalize(&mut self) {
         mapgen::finalize(self)
     }
 
diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
index 5736314b96a..fd376ea8d80 100644
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -189,13 +189,13 @@ impl WriteBackendMethods for LlvmCodegenBackend {
     ) -> Result<(Vec<LtoModuleCodegen<Self>>, Vec<WorkProduct>), FatalError> {
         back::lto::run_thin(cgcx, modules, cached_modules)
     }
-    unsafe fn optimize(
+    fn optimize(
         cgcx: &CodegenContext<Self>,
         dcx: DiagCtxtHandle<'_>,
         module: &mut ModuleCodegen<Self::Module>,
         config: &ModuleConfig,
     ) -> Result<(), FatalError> {
-        unsafe { back::write::optimize(cgcx, dcx, module, config) }
+        back::write::optimize(cgcx, dcx, module, config)
     }
     fn optimize_fat(
         cgcx: &CodegenContext<Self>,
@@ -205,19 +205,19 @@ impl WriteBackendMethods for LlvmCodegenBackend {
         let dcx = dcx.handle();
         back::lto::run_pass_manager(cgcx, dcx, module, false)
     }
-    unsafe fn optimize_thin(
+    fn optimize_thin(
         cgcx: &CodegenContext<Self>,
         thin: ThinModule<Self>,
     ) -> Result<ModuleCodegen<Self::Module>, FatalError> {
-        unsafe { back::lto::optimize_thin_module(thin, cgcx) }
+        back::lto::optimize_thin_module(thin, cgcx)
     }
-    unsafe fn codegen(
+    fn codegen(
         cgcx: &CodegenContext<Self>,
         dcx: DiagCtxtHandle<'_>,
         module: ModuleCodegen<Self::Module>,
         config: &ModuleConfig,
     ) -> Result<CompiledModule, FatalError> {
-        unsafe { back::write::codegen(cgcx, dcx, module, config) }
+        back::write::codegen(cgcx, dcx, module, config)
     }
     fn prepare_thin(
         module: ModuleCodegen<Self::Module>,
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index 67a66e6ec79..e27fbf94f34 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -426,14 +426,14 @@ pub(crate) enum AtomicOrdering {
 }
 
 impl AtomicOrdering {
-    pub(crate) fn from_generic(ao: rustc_codegen_ssa::common::AtomicOrdering) -> Self {
-        use rustc_codegen_ssa::common::AtomicOrdering as Common;
+    pub(crate) fn from_generic(ao: rustc_middle::ty::AtomicOrdering) -> Self {
+        use rustc_middle::ty::AtomicOrdering as Common;
         match ao {
             Common::Relaxed => Self::Monotonic,
             Common::Acquire => Self::Acquire,
             Common::Release => Self::Release,
-            Common::AcquireRelease => Self::AcquireRelease,
-            Common::SequentiallyConsistent => Self::SequentiallyConsistent,
+            Common::AcqRel => Self::AcquireRelease,
+            Common::SeqCst => Self::SequentiallyConsistent,
         }
     }
 }
diff --git a/compiler/rustc_codegen_llvm/src/mono_item.rs b/compiler/rustc_codegen_llvm/src/mono_item.rs
index fdf62a08065..3f38e1e191b 100644
--- a/compiler/rustc_codegen_llvm/src/mono_item.rs
+++ b/compiler/rustc_codegen_llvm/src/mono_item.rs
@@ -16,7 +16,7 @@ use crate::{base, llvm};
 
 impl<'tcx> PreDefineCodegenMethods<'tcx> for CodegenCx<'_, 'tcx> {
     fn predefine_static(
-        &self,
+        &mut self,
         def_id: DefId,
         linkage: Linkage,
         visibility: Visibility,
@@ -44,7 +44,7 @@ impl<'tcx> PreDefineCodegenMethods<'tcx> for CodegenCx<'_, 'tcx> {
     }
 
     fn predefine_fn(
-        &self,
+        &mut self,
         instance: Instance<'tcx>,
         linkage: Linkage,
         visibility: Visibility,
diff --git a/compiler/rustc_codegen_llvm/src/va_arg.rs b/compiler/rustc_codegen_llvm/src/va_arg.rs
index b91b6efed45..8eedb5392b5 100644
--- a/compiler/rustc_codegen_llvm/src/va_arg.rs
+++ b/compiler/rustc_codegen_llvm/src/va_arg.rs
@@ -1,7 +1,10 @@
-use rustc_abi::{Align, Endian, HasDataLayout, Size};
+use rustc_abi::{Align, BackendRepr, Endian, HasDataLayout, Primitive, Size, TyAndLayout};
+use rustc_codegen_ssa::MemFlags;
 use rustc_codegen_ssa::common::IntPredicate;
 use rustc_codegen_ssa::mir::operand::OperandRef;
-use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, BuilderMethods, ConstCodegenMethods};
+use rustc_codegen_ssa::traits::{
+    BaseTypeCodegenMethods, BuilderMethods, ConstCodegenMethods, LayoutTypeCodegenMethods,
+};
 use rustc_middle::ty::Ty;
 use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf};
 
@@ -303,6 +306,313 @@ fn emit_s390x_va_arg<'ll, 'tcx>(
     bx.load(val_type, val_addr, layout.align.abi)
 }
 
+fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
+    bx: &mut Builder<'_, 'll, 'tcx>,
+    list: OperandRef<'tcx, &'ll Value>,
+    target_ty: Ty<'tcx>,
+) -> &'ll Value {
+    let dl = bx.cx.data_layout();
+
+    // Implementation of the systemv x86_64 ABI calling convention for va_args, see
+    // https://gitlab.com/x86-psABIs/x86-64-ABI (section 3.5.7). This implementation is heavily
+    // based on the one in clang.
+
+    // We're able to take some shortcuts because the return type of `va_arg` must implement the
+    // `VaArgSafe` trait. Currently, only pointers, f64, i32, u32, i64 and u64 implement this trait.
+
+    // typedef struct __va_list_tag {
+    //     unsigned int gp_offset;
+    //     unsigned int fp_offset;
+    //     void *overflow_arg_area;
+    //     void *reg_save_area;
+    // } va_list[1];
+    let va_list_addr = list.immediate();
+
+    // Peel off any newtype wrappers.
+    //
+    // The "C" ABI does not unwrap newtypes (see `ReprOptions::inhibit_newtype_abi_optimization`).
+    // Here, we do actually want the unwrapped representation, because that is how LLVM/Clang
+    // pass such types to variadic functions.
+    //
+    // An example of a type that must be unwrapped is `Foo` below. Without the unwrapping, it has
+    // `BackendRepr::Memory`, but we need it to be `BackendRepr::Scalar` to generate correct code.
+    //
+    // ```
+    // #[repr(C)]
+    // struct Empty;
+    //
+    // #[repr(C)]
+    // struct Foo([Empty; 8], i32);
+    // ```
+    let layout = {
+        let mut layout = bx.cx.layout_of(target_ty);
+
+        while let Some((_, inner)) = layout.non_1zst_field(bx.cx) {
+            layout = inner;
+        }
+
+        layout
+    };
+
+    // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
+    // in the registers. If not go to step 7.
+
+    // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of
+    // general purpose registers needed to pass type and num_fp to hold
+    // the number of floating point registers needed.
+
+    let mut num_gp_registers = 0;
+    let mut num_fp_registers = 0;
+
+    let mut registers_for_primitive = |p| match p {
+        Primitive::Int(integer, _is_signed) => {
+            num_gp_registers += integer.size().bytes().div_ceil(8) as u32;
+        }
+        Primitive::Float(float) => {
+            num_fp_registers += float.size().bytes().div_ceil(16) as u32;
+        }
+        Primitive::Pointer(_) => {
+            num_gp_registers += 1;
+        }
+    };
+
+    match layout.layout.backend_repr() {
+        BackendRepr::Scalar(scalar) => {
+            registers_for_primitive(scalar.primitive());
+        }
+        BackendRepr::ScalarPair(scalar1, scalar2) => {
+            registers_for_primitive(scalar1.primitive());
+            registers_for_primitive(scalar2.primitive());
+        }
+        BackendRepr::SimdVector { .. } => {
+            // Because no instance of VaArgSafe uses a non-scalar `BackendRepr`.
+            unreachable!(
+                "No x86-64 SysV va_arg implementation for {:?}",
+                layout.layout.backend_repr()
+            )
+        }
+        BackendRepr::Memory { .. } => {
+            let mem_addr = x86_64_sysv64_va_arg_from_memory(bx, va_list_addr, layout);
+            return bx.load(layout.llvm_type(bx), mem_addr, layout.align.abi);
+        }
+    };
+
+    // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into
+    // registers. In the case: l->gp_offset > 48 - num_gp * 8 or
+    // l->fp_offset > 176 - num_fp * 16 go to step 7.
+
+    let unsigned_int_offset = 4;
+    let ptr_offset = 8;
+    let gp_offset_ptr = va_list_addr;
+    let fp_offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(unsigned_int_offset));
+
+    let gp_offset_v = bx.load(bx.type_i32(), gp_offset_ptr, Align::from_bytes(8).unwrap());
+    let fp_offset_v = bx.load(bx.type_i32(), fp_offset_ptr, Align::from_bytes(4).unwrap());
+
+    let mut use_regs = bx.const_bool(false);
+
+    if num_gp_registers > 0 {
+        let max_offset_val = 48u32 - num_gp_registers * 8;
+        let fits_in_gp = bx.icmp(IntPredicate::IntULE, gp_offset_v, bx.const_u32(max_offset_val));
+        use_regs = fits_in_gp;
+    }
+
+    if num_fp_registers > 0 {
+        let max_offset_val = 176u32 - num_fp_registers * 16;
+        let fits_in_fp = bx.icmp(IntPredicate::IntULE, fp_offset_v, bx.const_u32(max_offset_val));
+        use_regs = if num_gp_registers > 0 { bx.and(use_regs, fits_in_fp) } else { fits_in_fp };
+    }
+
+    let in_reg = bx.append_sibling_block("va_arg.in_reg");
+    let in_mem = bx.append_sibling_block("va_arg.in_mem");
+    let end = bx.append_sibling_block("va_arg.end");
+
+    bx.cond_br(use_regs, in_reg, in_mem);
+
+    // Emit code to load the value if it was passed in a register.
+    bx.switch_to_block(in_reg);
+
+    // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with
+    // an offset of l->gp_offset and/or l->fp_offset. This may require
+    // copying to a temporary location in case the parameter is passed
+    // in different register classes or requires an alignment greater
+    // than 8 for general purpose registers and 16 for XMM registers.
+    //
+    // FIXME(llvm): This really results in shameful code when we end up needing to
+    // collect arguments from different places; often what should result in a
+    // simple assembling of a structure from scattered addresses has many more
+    // loads than necessary. Can we clean this up?
+    let reg_save_area_ptr =
+        bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * unsigned_int_offset + ptr_offset));
+    let reg_save_area_v = bx.load(bx.type_ptr(), reg_save_area_ptr, dl.pointer_align.abi);
+
+    let reg_addr = match layout.layout.backend_repr() {
+        BackendRepr::Scalar(scalar) => match scalar.primitive() {
+            Primitive::Int(_, _) | Primitive::Pointer(_) => {
+                let reg_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v);
+
+                // Copy into a temporary if the type is more aligned than the register save area.
+                let gp_align = Align::from_bytes(8).unwrap();
+                copy_to_temporary_if_more_aligned(bx, reg_addr, layout, gp_align)
+            }
+            Primitive::Float(_) => bx.inbounds_ptradd(reg_save_area_v, fp_offset_v),
+        },
+        BackendRepr::ScalarPair(scalar1, scalar2) => {
+            let ty_lo = bx.cx().scalar_pair_element_backend_type(layout, 0, false);
+            let ty_hi = bx.cx().scalar_pair_element_backend_type(layout, 1, false);
+
+            let align_lo = layout.field(bx.cx, 0).layout.align().abi;
+            let align_hi = layout.field(bx.cx, 1).layout.align().abi;
+
+            match (scalar1.primitive(), scalar2.primitive()) {
+                (Primitive::Float(_), Primitive::Float(_)) => {
+                    // SSE registers are spaced 16 bytes apart in the register save
+                    // area, we need to collect the two eightbytes together.
+                    // The ABI isn't explicit about this, but it seems reasonable
+                    // to assume that the slots are 16-byte aligned, since the stack is
+                    // naturally 16-byte aligned and the prologue is expected to store
+                    // all the SSE registers to the RSA.
+                    let reg_lo_addr = bx.inbounds_ptradd(reg_save_area_v, fp_offset_v);
+                    let reg_hi_addr = bx.inbounds_ptradd(reg_lo_addr, bx.const_i32(16));
+
+                    let align = layout.layout.align().abi;
+                    let tmp = bx.alloca(layout.layout.size(), align);
+
+                    let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo);
+                    let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi);
+
+                    let offset = scalar1.size(bx.cx).align_to(align_hi).bytes();
+                    let field0 = tmp;
+                    let field1 = bx.inbounds_ptradd(tmp, bx.const_u32(offset as u32));
+
+                    bx.store(reg_lo, field0, align);
+                    bx.store(reg_hi, field1, align);
+
+                    tmp
+                }
+                (Primitive::Float(_), _) | (_, Primitive::Float(_)) => {
+                    let gp_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v);
+                    let fp_addr = bx.inbounds_ptradd(reg_save_area_v, fp_offset_v);
+
+                    let (reg_lo_addr, reg_hi_addr) = match scalar1.primitive() {
+                        Primitive::Float(_) => (fp_addr, gp_addr),
+                        Primitive::Int(_, _) | Primitive::Pointer(_) => (gp_addr, fp_addr),
+                    };
+
+                    let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
+
+                    let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo);
+                    let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi);
+
+                    let offset = scalar1.size(bx.cx).align_to(align_hi).bytes();
+                    let field0 = tmp;
+                    let field1 = bx.inbounds_ptradd(tmp, bx.const_u32(offset as u32));
+
+                    bx.store(reg_lo, field0, align_lo);
+                    bx.store(reg_hi, field1, align_hi);
+
+                    tmp
+                }
+                (_, _) => {
+                    // Two integer/pointer values are just contiguous in memory.
+                    let reg_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v);
+
+                    // Copy into a temporary if the type is more aligned than the register save area.
+                    let gp_align = Align::from_bytes(8).unwrap();
+                    copy_to_temporary_if_more_aligned(bx, reg_addr, layout, gp_align)
+                }
+            }
+        }
+        // The Previous match on `BackendRepr` means control flow already escaped.
+        BackendRepr::SimdVector { .. } | BackendRepr::Memory { .. } => unreachable!(),
+    };
+
+    // AMD64-ABI 3.5.7p5: Step 5. Set:
+    // l->gp_offset = l->gp_offset + num_gp * 8
+    if num_gp_registers > 0 {
+        let offset = bx.const_u32(num_gp_registers * 8);
+        let sum = bx.add(gp_offset_v, offset);
+        // An alignment of 8 because `__va_list_tag` is 8-aligned and this is its first field.
+        bx.store(sum, gp_offset_ptr, Align::from_bytes(8).unwrap());
+    }
+
+    // l->fp_offset = l->fp_offset + num_fp * 16.
+    if num_fp_registers > 0 {
+        let offset = bx.const_u32(num_fp_registers * 16);
+        let sum = bx.add(fp_offset_v, offset);
+        bx.store(sum, fp_offset_ptr, Align::from_bytes(4).unwrap());
+    }
+
+    bx.br(end);
+
+    bx.switch_to_block(in_mem);
+    let mem_addr = x86_64_sysv64_va_arg_from_memory(bx, va_list_addr, layout);
+    bx.br(end);
+
+    bx.switch_to_block(end);
+
+    let val_type = layout.llvm_type(bx);
+    let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
+
+    bx.load(val_type, val_addr, layout.align.abi)
+}
+
+/// Copy into a temporary if the type is more aligned than the register save area.
+fn copy_to_temporary_if_more_aligned<'ll, 'tcx>(
+    bx: &mut Builder<'_, 'll, 'tcx>,
+    reg_addr: &'ll Value,
+    layout: TyAndLayout<'tcx, Ty<'tcx>>,
+    src_align: Align,
+) -> &'ll Value {
+    if layout.layout.align.abi > src_align {
+        let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
+        bx.memcpy(
+            tmp,
+            layout.layout.align.abi,
+            reg_addr,
+            src_align,
+            bx.const_u32(layout.layout.size().bytes() as u32),
+            MemFlags::empty(),
+        );
+        tmp
+    } else {
+        reg_addr
+    }
+}
+
+fn x86_64_sysv64_va_arg_from_memory<'ll, 'tcx>(
+    bx: &mut Builder<'_, 'll, 'tcx>,
+    va_list_addr: &'ll Value,
+    layout: TyAndLayout<'tcx, Ty<'tcx>>,
+) -> &'ll Value {
+    let dl = bx.cx.data_layout();
+
+    let overflow_arg_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.const_usize(8));
+
+    let overflow_arg_area_v = bx.load(bx.type_ptr(), overflow_arg_area_ptr, dl.pointer_align.abi);
+    // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
+    // byte boundary if alignment needed by type exceeds 8 byte boundary.
+    // It isn't stated explicitly in the standard, but in practice we use
+    // alignment greater than 16 where necessary.
+    if layout.layout.align.abi.bytes() > 8 {
+        unreachable!("all instances of VaArgSafe have an alignment <= 8");
+    }
+
+    // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area.
+    let mem_addr = overflow_arg_area_v;
+
+    // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to:
+    // l->overflow_arg_area + sizeof(type).
+    // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to
+    // an 8 byte boundary.
+    let size_in_bytes = layout.layout.size().bytes();
+    let offset = bx.const_i32(size_in_bytes.next_multiple_of(8) as i32);
+    let overflow_arg_area = bx.inbounds_ptradd(overflow_arg_area_v, offset);
+    bx.store(overflow_arg_area, overflow_arg_area_ptr, dl.pointer_align.abi);
+
+    mem_addr
+}
+
 fn emit_xtensa_va_arg<'ll, 'tcx>(
     bx: &mut Builder<'_, 'll, 'tcx>,
     list: OperandRef<'tcx, &'ll Value>,
@@ -334,8 +644,7 @@ fn emit_xtensa_va_arg<'ll, 'tcx>(
     // (*va).va_ndx
     let va_reg_offset = 4;
     let va_ndx_offset = va_reg_offset + 4;
-    let offset_ptr =
-        bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(va_ndx_offset)]);
+    let offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(va_ndx_offset));
 
     let offset = bx.load(bx.type_i32(), offset_ptr, bx.tcx().data_layout.i32_align.abi);
     let offset = round_up_to_alignment(bx, offset, layout.align.abi);
@@ -356,11 +665,10 @@ fn emit_xtensa_va_arg<'ll, 'tcx>(
     bx.store(offset_next, offset_ptr, bx.tcx().data_layout.pointer_align.abi);
 
     // (*va).va_reg
-    let regsave_area_ptr =
-        bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(va_reg_offset)]);
+    let regsave_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(va_reg_offset));
     let regsave_area =
         bx.load(bx.type_ptr(), regsave_area_ptr, bx.tcx().data_layout.pointer_align.abi);
-    let regsave_value_ptr = bx.inbounds_gep(bx.type_i8(), regsave_area, &[offset]);
+    let regsave_value_ptr = bx.inbounds_ptradd(regsave_area, offset);
     bx.br(end);
 
     bx.switch_to_block(from_stack);
@@ -381,9 +689,9 @@ fn emit_xtensa_va_arg<'ll, 'tcx>(
     bx.store(offset_next_corrected, offset_ptr, bx.tcx().data_layout.pointer_align.abi);
 
     // let stack_value_ptr = unsafe { (*va).va_stk.byte_add(offset_corrected) };
-    let stack_area_ptr = bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(0)]);
+    let stack_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(0));
     let stack_area = bx.load(bx.type_ptr(), stack_area_ptr, bx.tcx().data_layout.pointer_align.abi);
-    let stack_value_ptr = bx.inbounds_gep(bx.type_i8(), stack_area, &[offset_corrected]);
+    let stack_value_ptr = bx.inbounds_ptradd(stack_area, offset_corrected);
     bx.br(end);
 
     bx.switch_to_block(end);
@@ -449,6 +757,8 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
                 AllowHigherAlign::No,
             )
         }
+        // This includes `target.is_like_darwin`, which on x86_64 targets is like sysv64.
+        "x86_64" => emit_x86_64_sysv64_va_arg(bx, addr, target_ty),
         "xtensa" => emit_xtensa_va_arg(bx, addr, target_ty),
         // For all other architecture/OS combinations fall back to using
         // the LLVM va_arg instruction.