about summary refs log tree commit diff
path: root/compiler/rustc_codegen_gcc/src
diff options
context:
space:
mode:
authorGuillaume Gomez <guillaume.gomez@huawei.com>2024-07-10 12:44:23 +0200
committerGuillaume Gomez <guillaume.gomez@huawei.com>2024-07-10 12:44:23 +0200
commit7cbe50e2098c35fda06433cd36bbced941607317 (patch)
tree5f93154e463e7258902781d746195519e20a9fc6 /compiler/rustc_codegen_gcc/src
parent649feb9c1a3c56650a4b6fa638b23103cbcd0dcd (diff)
parent98ed962c7d3eebe12c97588e61245273d265e72f (diff)
downloadrust-7cbe50e2098c35fda06433cd36bbced941607317.tar.gz
rust-7cbe50e2098c35fda06433cd36bbced941607317.zip
Merge commit '98ed962c7d3eebe12c97588e61245273d265e72f' into master
Diffstat (limited to 'compiler/rustc_codegen_gcc/src')
-rw-r--r--compiler/rustc_codegen_gcc/src/abi.rs15
-rw-r--r--compiler/rustc_codegen_gcc/src/asm.rs61
-rw-r--r--compiler/rustc_codegen_gcc/src/attributes.rs14
-rw-r--r--compiler/rustc_codegen_gcc/src/back/lto.rs404
-rw-r--r--compiler/rustc_codegen_gcc/src/back/write.rs20
-rw-r--r--compiler/rustc_codegen_gcc/src/base.rs32
-rw-r--r--compiler/rustc_codegen_gcc/src/builder.rs126
-rw-r--r--compiler/rustc_codegen_gcc/src/callee.rs4
-rw-r--r--compiler/rustc_codegen_gcc/src/common.rs11
-rw-r--r--compiler/rustc_codegen_gcc/src/consts.rs94
-rw-r--r--compiler/rustc_codegen_gcc/src/context.rs37
-rw-r--r--compiler/rustc_codegen_gcc/src/debuginfo.rs29
-rw-r--r--compiler/rustc_codegen_gcc/src/declare.rs16
-rw-r--r--compiler/rustc_codegen_gcc/src/int.rs86
-rw-r--r--compiler/rustc_codegen_gcc/src/intrinsic/archs.rs33
-rw-r--r--compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs17
-rw-r--r--compiler/rustc_codegen_gcc/src/intrinsic/mod.rs69
-rw-r--r--compiler/rustc_codegen_gcc/src/intrinsic/simd.rs195
-rw-r--r--compiler/rustc_codegen_gcc/src/lib.rs95
-rw-r--r--compiler/rustc_codegen_gcc/src/mono_item.rs2
-rw-r--r--compiler/rustc_codegen_gcc/src/type_.rs82
-rw-r--r--compiler/rustc_codegen_gcc/src/type_of.rs12
22 files changed, 1137 insertions, 317 deletions
diff --git a/compiler/rustc_codegen_gcc/src/abi.rs b/compiler/rustc_codegen_gcc/src/abi.rs
index b098594dbcc..166dd080cf2 100644
--- a/compiler/rustc_codegen_gcc/src/abi.rs
+++ b/compiler/rustc_codegen_gcc/src/abi.rs
@@ -4,6 +4,7 @@ use gccjit::{ToLValue, ToRValue, Type};
 use rustc_codegen_ssa::traits::{AbiBuilderMethods, BaseTypeMethods};
 use rustc_data_structures::fx::FxHashSet;
 use rustc_middle::bug;
+use rustc_middle::ty::layout::LayoutOf;
 use rustc_middle::ty::Ty;
 #[cfg(feature = "master")]
 use rustc_session::config;
@@ -184,9 +185,17 @@ impl<'gcc, 'tcx> FnAbiGccExt<'gcc, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
                 }
                 PassMode::Indirect { attrs, meta_attrs: Some(meta_attrs), on_stack } => {
                     assert!(!on_stack);
-                    let ty =
-                        apply_attrs(cx.type_ptr_to(arg.memory_ty(cx)), &attrs, argument_tys.len());
-                    apply_attrs(ty, &meta_attrs, argument_tys.len())
+                    // Construct the type of a (wide) pointer to `ty`, and pass its two fields.
+                    // Any two ABI-compatible unsized types have the same metadata type and
+                    // moreover the same metadata value leads to the same dynamic size and
+                    // alignment, so this respects ABI compatibility.
+                    let ptr_ty = Ty::new_mut_ptr(cx.tcx, arg.layout.ty);
+                    let ptr_layout = cx.layout_of(ptr_ty);
+                    let typ1 = ptr_layout.scalar_pair_element_gcc_type(cx, 0);
+                    let typ2 = ptr_layout.scalar_pair_element_gcc_type(cx, 1);
+                    argument_tys.push(apply_attrs(typ1, &attrs, argument_tys.len()));
+                    argument_tys.push(apply_attrs(typ2, &meta_attrs, argument_tys.len()));
+                    continue;
                 }
             };
             argument_tys.push(arg_ty);
diff --git a/compiler/rustc_codegen_gcc/src/asm.rs b/compiler/rustc_codegen_gcc/src/asm.rs
index 06b14a1f118..aa485846cd4 100644
--- a/compiler/rustc_codegen_gcc/src/asm.rs
+++ b/compiler/rustc_codegen_gcc/src/asm.rs
@@ -115,7 +115,7 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
         span: &[Span],
         instance: Instance<'_>,
         dest: Option<Self::BasicBlock>,
-        _catch_funclet: Option<(Self::BasicBlock, Option<&Self::Funclet>)>,
+        _dest_catch_funclet: Option<(Self::BasicBlock, Option<&Self::Funclet>)>,
     ) {
         if options.contains(InlineAsmOptions::MAY_UNWIND) {
             self.sess().dcx().create_err(UnwindingInlineAsm { span: span[0] }).emit();
@@ -485,9 +485,8 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
                         }
 
                         InlineAsmOperandRef::Label { label } => {
-                            let label_gcc_index = labels.iter()
-                                .position(|&l| l == label)
-                                .expect("wrong rust index");
+                            let label_gcc_index =
+                                labels.iter().position(|&l| l == label).expect("wrong rust index");
                             let gcc_index = label_gcc_index + outputs.len() + inputs.len();
                             push_to_template(Some('l'), gcc_index);
                         }
@@ -538,9 +537,8 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
         }
         if dest.is_none() && options.contains(InlineAsmOptions::NORETURN) {
             let builtin_unreachable = self.context.get_builtin_function("__builtin_unreachable");
-            let builtin_unreachable: RValue<'gcc> = unsafe {
-                std::mem::transmute(builtin_unreachable)
-            };
+            let builtin_unreachable: RValue<'gcc> =
+                unsafe { std::mem::transmute(builtin_unreachable) };
             self.call(self.type_void(), None, None, builtin_unreachable, &[], None, None);
         }
 
@@ -696,10 +694,12 @@ fn reg_to_gcc(reg: InlineAsmRegOrRegClass) -> ConstraintOrRegister {
 fn dummy_output_type<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, reg: InlineAsmRegClass) -> Type<'gcc> {
     match reg {
         InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::reg) => cx.type_i32(),
-        InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::preg) => unimplemented!(),
         InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg)
         | InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16) => {
-            unimplemented!()
+            cx.type_vector(cx.type_i64(), 2)
+        }
+        InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::preg) => {
+            unreachable!("clobber-only")
         }
         InlineAsmRegClass::Arm(ArmInlineAsmRegClass::reg) => cx.type_i32(),
         InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg)
@@ -710,21 +710,13 @@ fn dummy_output_type<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, reg: InlineAsmRegCl
         InlineAsmRegClass::Arm(ArmInlineAsmRegClass::qreg)
         | InlineAsmRegClass::Arm(ArmInlineAsmRegClass::qreg_low8)
         | InlineAsmRegClass::Arm(ArmInlineAsmRegClass::qreg_low4) => {
-            unimplemented!()
+            cx.type_vector(cx.type_i64(), 2)
         }
-        InlineAsmRegClass::Avr(_) => unimplemented!(),
-        InlineAsmRegClass::Bpf(_) => unimplemented!(),
         InlineAsmRegClass::Hexagon(HexagonInlineAsmRegClass::reg) => cx.type_i32(),
         InlineAsmRegClass::LoongArch(LoongArchInlineAsmRegClass::reg) => cx.type_i32(),
         InlineAsmRegClass::LoongArch(LoongArchInlineAsmRegClass::freg) => cx.type_f32(),
-        InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg) => cx.type_i32(),
-        InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg_addr) => cx.type_i32(),
-        InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg_data) => cx.type_i32(),
-        InlineAsmRegClass::CSKY(CSKYInlineAsmRegClass::reg) => cx.type_i32(),
-        InlineAsmRegClass::CSKY(CSKYInlineAsmRegClass::freg) => cx.type_f32(),
         InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg) => cx.type_i32(),
         InlineAsmRegClass::Mips(MipsInlineAsmRegClass::freg) => cx.type_f32(),
-        InlineAsmRegClass::Msp430(_) => unimplemented!(),
         InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg16) => cx.type_i16(),
         InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg32) => cx.type_i32(),
         InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg64) => cx.type_i64(),
@@ -737,26 +729,43 @@ fn dummy_output_type<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, reg: InlineAsmRegCl
         }
         InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::reg) => cx.type_i32(),
         InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::freg) => cx.type_f32(),
-        InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::vreg) => cx.type_f32(),
+        InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::vreg) => {
+            unreachable!("clobber-only")
+        }
         InlineAsmRegClass::X86(X86InlineAsmRegClass::reg)
         | InlineAsmRegClass::X86(X86InlineAsmRegClass::reg_abcd) => cx.type_i32(),
         InlineAsmRegClass::X86(X86InlineAsmRegClass::reg_byte) => cx.type_i8(),
-        InlineAsmRegClass::X86(X86InlineAsmRegClass::mmx_reg) => unimplemented!(),
         InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg)
         | InlineAsmRegClass::X86(X86InlineAsmRegClass::ymm_reg)
         | InlineAsmRegClass::X86(X86InlineAsmRegClass::zmm_reg) => cx.type_f32(),
-        InlineAsmRegClass::X86(X86InlineAsmRegClass::x87_reg) => unimplemented!(),
         InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg) => cx.type_i16(),
-        InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg0) => cx.type_i16(),
-        InlineAsmRegClass::X86(X86InlineAsmRegClass::tmm_reg) => unimplemented!(),
-        InlineAsmRegClass::Wasm(WasmInlineAsmRegClass::local) => cx.type_i32(),
-        InlineAsmRegClass::SpirV(SpirVInlineAsmRegClass::reg) => {
-            bug!("LLVM backend does not support SPIR-V")
+        InlineAsmRegClass::X86(X86InlineAsmRegClass::x87_reg)
+        | InlineAsmRegClass::X86(X86InlineAsmRegClass::mmx_reg)
+        | InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg0)
+        | InlineAsmRegClass::X86(X86InlineAsmRegClass::tmm_reg) => {
+            unreachable!("clobber-only")
         }
+        InlineAsmRegClass::Wasm(WasmInlineAsmRegClass::local) => cx.type_i32(),
+        InlineAsmRegClass::Bpf(BpfInlineAsmRegClass::reg) => cx.type_i64(),
+        InlineAsmRegClass::Bpf(BpfInlineAsmRegClass::wreg) => cx.type_i32(),
+        InlineAsmRegClass::Avr(AvrInlineAsmRegClass::reg) => cx.type_i8(),
+        InlineAsmRegClass::Avr(AvrInlineAsmRegClass::reg_upper) => cx.type_i8(),
+        InlineAsmRegClass::Avr(AvrInlineAsmRegClass::reg_pair) => cx.type_i16(),
+        InlineAsmRegClass::Avr(AvrInlineAsmRegClass::reg_iw) => cx.type_i16(),
+        InlineAsmRegClass::Avr(AvrInlineAsmRegClass::reg_ptr) => cx.type_i16(),
         InlineAsmRegClass::S390x(
             S390xInlineAsmRegClass::reg | S390xInlineAsmRegClass::reg_addr,
         ) => cx.type_i32(),
         InlineAsmRegClass::S390x(S390xInlineAsmRegClass::freg) => cx.type_f64(),
+        InlineAsmRegClass::Msp430(Msp430InlineAsmRegClass::reg) => cx.type_i16(),
+        InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg) => cx.type_i32(),
+        InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg_addr) => cx.type_i32(),
+        InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg_data) => cx.type_i32(),
+        InlineAsmRegClass::CSKY(CSKYInlineAsmRegClass::reg) => cx.type_i32(),
+        InlineAsmRegClass::CSKY(CSKYInlineAsmRegClass::freg) => cx.type_f32(),
+        InlineAsmRegClass::SpirV(SpirVInlineAsmRegClass::reg) => {
+            bug!("GCC backend does not support SPIR-V")
+        }
         InlineAsmRegClass::Err => unreachable!(),
     }
 }
diff --git a/compiler/rustc_codegen_gcc/src/attributes.rs b/compiler/rustc_codegen_gcc/src/attributes.rs
index 8602566ab8f..27f21107eda 100644
--- a/compiler/rustc_codegen_gcc/src/attributes.rs
+++ b/compiler/rustc_codegen_gcc/src/attributes.rs
@@ -92,7 +92,7 @@ pub fn from_fn_attrs<'gcc, 'tcx>(
     let mut function_features = function_features
         .iter()
         .flat_map(|feat| to_gcc_features(cx.tcx.sess, feat).into_iter())
-        .chain(codegen_fn_attrs.instruction_set.iter().map(|x| match x {
+        .chain(codegen_fn_attrs.instruction_set.iter().map(|x| match *x {
             InstructionSetAttr::ArmA32 => "-thumb-mode", // TODO(antoyo): support removing feature.
             InstructionSetAttr::ArmT32 => "thumb-mode",
         }))
@@ -118,8 +118,8 @@ pub fn from_fn_attrs<'gcc, 'tcx>(
 
             if feature.starts_with('-') {
                 Some(format!("no{}", feature))
-            } else if feature.starts_with('+') {
-                Some(feature[1..].to_string())
+            } else if let Some(stripped) = feature.strip_prefix('+') {
+                Some(stripped.to_string())
             } else {
                 Some(feature.to_string())
             }
@@ -128,6 +128,12 @@ pub fn from_fn_attrs<'gcc, 'tcx>(
         .join(",");
     if !target_features.is_empty() {
         #[cfg(feature = "master")]
-        func.add_attribute(FnAttribute::Target(&target_features));
+        match cx.sess().target.arch.as_ref() {
+            "x86" | "x86_64" | "powerpc" => {
+                func.add_attribute(FnAttribute::Target(&target_features))
+            }
+            // The target attribute is not supported on other targets in GCC.
+            _ => (),
+        }
     }
 }
diff --git a/compiler/rustc_codegen_gcc/src/back/lto.rs b/compiler/rustc_codegen_gcc/src/back/lto.rs
index ec70fbdddb0..6b2dbbbed67 100644
--- a/compiler/rustc_codegen_gcc/src/back/lto.rs
+++ b/compiler/rustc_codegen_gcc/src/back/lto.rs
@@ -16,13 +16,14 @@
 // /usr/bin/ld: warning: type of symbol `_RNvNvNvNtCs5JWOrf9uCus_5rayon11thread_pool19WORKER_THREAD_STATE7___getit5___KEY' changed from 1 to 6 in /tmp/ccKeUSiR.ltrans0.ltrans.o
 // /usr/bin/ld: warning: type of symbol `_RNvNvNvNvNtNtNtCsAj5i4SGTR7_3std4sync4mpmc5waker17current_thread_id5DUMMY7___getit5___KEY' changed from 1 to 6 in /tmp/ccKeUSiR.ltrans0.ltrans.o
 // /usr/bin/ld: warning: incremental linking of LTO and non-LTO objects; using -flinker-output=nolto-rel which will bypass whole program optimization
-use std::ffi::CString;
+use std::ffi::{CStr, CString};
 use std::fs::{self, File};
 use std::path::{Path, PathBuf};
+use std::sync::Arc;
 
-use gccjit::OutputKind;
+use gccjit::{Context, OutputKind};
 use object::read::archive::ArchiveFile;
-use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule};
+use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule, ThinShared};
 use rustc_codegen_ssa::back::symbol_export;
 use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput};
 use rustc_codegen_ssa::traits::*;
@@ -30,6 +31,7 @@ use rustc_codegen_ssa::{looks_like_rust_object_file, ModuleCodegen, ModuleKind};
 use rustc_data_structures::memmap::Mmap;
 use rustc_errors::{DiagCtxtHandle, FatalError};
 use rustc_hir::def_id::LOCAL_CRATE;
+use rustc_middle::bug;
 use rustc_middle::dep_graph::WorkProduct;
 use rustc_middle::middle::exported_symbols::{SymbolExportInfo, SymbolExportLevel};
 use rustc_session::config::{CrateType, Lto};
@@ -37,7 +39,7 @@ use tempfile::{tempdir, TempDir};
 
 use crate::back::write::save_temp_bitcode;
 use crate::errors::{DynamicLinkingWithLTO, LtoBitcodeFromRlib, LtoDisallowed, LtoDylib};
-use crate::{to_gcc_opt_level, GccCodegenBackend, GccContext};
+use crate::{to_gcc_opt_level, GccCodegenBackend, GccContext, SyncContext};
 
 /// We keep track of the computed LTO cache keys from the previous
 /// session to determine which CGUs we can reuse.
@@ -128,8 +130,7 @@ fn prepare_lto(
             }
 
             let archive_data = unsafe {
-                Mmap::map(File::open(&path).expect("couldn't open rlib"))
-                    .expect("couldn't map rlib")
+                Mmap::map(File::open(path).expect("couldn't open rlib")).expect("couldn't map rlib")
             };
             let archive = ArchiveFile::parse(&*archive_data).expect("wanted an rlib");
             let obj_files = archive
@@ -349,6 +350,395 @@ impl ModuleBuffer {
 
 impl ModuleBufferMethods for ModuleBuffer {
     fn data(&self) -> &[u8] {
-        unimplemented!("data not needed for GCC codegen");
+        &[]
     }
 }
+
+/// Performs thin LTO by performing necessary global analysis and returning two
+/// lists, one of the modules that need optimization and another for modules that
+/// can simply be copied over from the incr. comp. cache.
+pub(crate) fn run_thin(
+    cgcx: &CodegenContext<GccCodegenBackend>,
+    modules: Vec<(String, ThinBuffer)>,
+    cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
+) -> Result<(Vec<LtoModuleCodegen<GccCodegenBackend>>, Vec<WorkProduct>), FatalError> {
+    let dcx = cgcx.create_dcx();
+    let dcx = dcx.handle();
+    let lto_data = prepare_lto(cgcx, dcx)?;
+    /*let symbols_below_threshold =
+    symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();*/
+    if cgcx.opts.cg.linker_plugin_lto.enabled() {
+        unreachable!(
+            "We should never reach this case if the LTO step \
+                      is deferred to the linker"
+        );
+    }
+    thin_lto(
+        cgcx,
+        dcx,
+        modules,
+        lto_data.upstream_modules,
+        lto_data.tmp_path,
+        cached_modules, /*, &symbols_below_threshold*/
+    )
+}
+
+pub(crate) fn prepare_thin(
+    module: ModuleCodegen<GccContext>,
+    _emit_summary: bool,
+) -> (String, ThinBuffer) {
+    let name = module.name;
+    //let buffer = ThinBuffer::new(module.module_llvm.context, true, emit_summary);
+    let buffer = ThinBuffer::new(&module.module_llvm.context);
+    (name, buffer)
+}
+
+/// Prepare "thin" LTO to get run on these modules.
+///
+/// The general structure of ThinLTO is quite different from the structure of
+/// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into
+/// one giant LLVM module, and then we run more optimization passes over this
+/// big module after internalizing most symbols. Thin LTO, on the other hand,
+/// avoid this large bottleneck through more targeted optimization.
+///
+/// At a high level Thin LTO looks like:
+///
+///    1. Prepare a "summary" of each LLVM module in question which describes
+///       the values inside, cost of the values, etc.
+///    2. Merge the summaries of all modules in question into one "index"
+///    3. Perform some global analysis on this index
+///    4. For each module, use the index and analysis calculated previously to
+///       perform local transformations on the module, for example inlining
+///       small functions from other modules.
+///    5. Run thin-specific optimization passes over each module, and then code
+///       generate everything at the end.
+///
+/// The summary for each module is intended to be quite cheap, and the global
+/// index is relatively quite cheap to create as well. As a result, the goal of
+/// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more
+/// situations. For example one cheap optimization is that we can parallelize
+/// all codegen modules, easily making use of all the cores on a machine.
+///
+/// With all that in mind, the function here is designed at specifically just
+/// calculating the *index* for ThinLTO. This index will then be shared amongst
+/// all of the `LtoModuleCodegen` units returned below and destroyed once
+/// they all go out of scope.
+fn thin_lto(
+    cgcx: &CodegenContext<GccCodegenBackend>,
+    _dcx: DiagCtxtHandle<'_>,
+    modules: Vec<(String, ThinBuffer)>,
+    serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
+    tmp_path: TempDir,
+    cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
+    //symbols_below_threshold: &[*const libc::c_char],
+) -> Result<(Vec<LtoModuleCodegen<GccCodegenBackend>>, Vec<WorkProduct>), FatalError> {
+    let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_global_analysis");
+    info!("going for that thin, thin LTO");
+
+    /*let green_modules: FxHashMap<_, _> =
+    cached_modules.iter().map(|(_, wp)| (wp.cgu_name.clone(), wp.clone())).collect();*/
+
+    let full_scope_len = modules.len() + serialized_modules.len() + cached_modules.len();
+    let mut thin_buffers = Vec::with_capacity(modules.len());
+    let mut module_names = Vec::with_capacity(full_scope_len);
+    //let mut thin_modules = Vec::with_capacity(full_scope_len);
+
+    for (i, (name, buffer)) in modules.into_iter().enumerate() {
+        info!("local module: {} - {}", i, name);
+        let cname = CString::new(name.as_bytes()).unwrap();
+        /*thin_modules.push(llvm::ThinLTOModule {
+            identifier: cname.as_ptr(),
+            data: buffer.data().as_ptr(),
+            len: buffer.data().len(),
+        });*/
+        thin_buffers.push(buffer);
+        module_names.push(cname);
+    }
+
+    // FIXME: All upstream crates are deserialized internally in the
+    //        function below to extract their summary and modules. Note that
+    //        unlike the loop above we *must* decode and/or read something
+    //        here as these are all just serialized files on disk. An
+    //        improvement, however, to make here would be to store the
+    //        module summary separately from the actual module itself. Right
+    //        now this is store in one large bitcode file, and the entire
+    //        file is deflate-compressed. We could try to bypass some of the
+    //        decompression by storing the index uncompressed and only
+    //        lazily decompressing the bytecode if necessary.
+    //
+    //        Note that truly taking advantage of this optimization will
+    //        likely be further down the road. We'd have to implement
+    //        incremental ThinLTO first where we could actually avoid
+    //        looking at upstream modules entirely sometimes (the contents,
+    //        we must always unconditionally look at the index).
+    let mut serialized = Vec::with_capacity(serialized_modules.len() + cached_modules.len());
+
+    let cached_modules =
+        cached_modules.into_iter().map(|(sm, wp)| (sm, CString::new(wp.cgu_name).unwrap()));
+
+    for (module, name) in serialized_modules.into_iter().chain(cached_modules) {
+        info!("upstream or cached module {:?}", name);
+        /*thin_modules.push(llvm::ThinLTOModule {
+            identifier: name.as_ptr(),
+            data: module.data().as_ptr(),
+            len: module.data().len(),
+        });*/
+
+        match module {
+            SerializedModule::Local(_) => {
+                //let path = module_buffer.0.to_str().expect("path");
+                //let my_path = PathBuf::from(path);
+                //let exists = my_path.exists();
+                /*module.module_llvm.should_combine_object_files = true;
+                module
+                .module_llvm
+                .context
+                .add_driver_option(module_buffer.0.to_str().expect("path"));*/
+            }
+            SerializedModule::FromRlib(_) => unimplemented!("from rlib"),
+            SerializedModule::FromUncompressedFile(_) => {
+                unimplemented!("from uncompressed file")
+            }
+        }
+
+        serialized.push(module);
+        module_names.push(name);
+    }
+
+    // Sanity check
+    //assert_eq!(thin_modules.len(), module_names.len());
+
+    // Delegate to the C++ bindings to create some data here. Once this is a
+    // tried-and-true interface we may wish to try to upstream some of this
+    // to LLVM itself, right now we reimplement a lot of what they do
+    // upstream...
+    /*let data = llvm::LLVMRustCreateThinLTOData(
+        thin_modules.as_ptr(),
+        thin_modules.len() as u32,
+        symbols_below_threshold.as_ptr(),
+        symbols_below_threshold.len() as u32,
+    )
+    .ok_or_else(|| write::llvm_err(dcx, LlvmError::PrepareThinLtoContext))?;
+    */
+
+    let data = ThinData; //(Arc::new(tmp_path))/*(data)*/;
+
+    info!("thin LTO data created");
+
+    /*let (key_map_path, prev_key_map, curr_key_map) =
+        if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
+            let path = incr_comp_session_dir.join(THIN_LTO_KEYS_INCR_COMP_FILE_NAME);
+            // If the previous file was deleted, or we get an IO error
+            // reading the file, then we'll just use `None` as the
+            // prev_key_map, which will force the code to be recompiled.
+            let prev =
+                if path.exists() { ThinLTOKeysMap::load_from_file(&path).ok() } else { None };
+            let curr = ThinLTOKeysMap::from_thin_lto_modules(&data, &thin_modules, &module_names);
+            (Some(path), prev, curr)
+        }
+        else {
+            // If we don't compile incrementally, we don't need to load the
+            // import data from LLVM.
+            assert!(green_modules.is_empty());
+            let curr = ThinLTOKeysMap::default();
+            (None, None, curr)
+        };
+    info!("thin LTO cache key map loaded");
+    info!("prev_key_map: {:#?}", prev_key_map);
+    info!("curr_key_map: {:#?}", curr_key_map);*/
+
+    // Throw our data in an `Arc` as we'll be sharing it across threads. We
+    // also put all memory referenced by the C++ data (buffers, ids, etc)
+    // into the arc as well. After this we'll create a thin module
+    // codegen per module in this data.
+    let shared =
+        Arc::new(ThinShared { data, thin_buffers, serialized_modules: serialized, module_names });
+
+    let copy_jobs = vec![];
+    let mut opt_jobs = vec![];
+
+    info!("checking which modules can be-reused and which have to be re-optimized.");
+    for (module_index, module_name) in shared.module_names.iter().enumerate() {
+        let module_name = module_name_to_str(module_name);
+        /*if let (Some(prev_key_map), true) =
+            (prev_key_map.as_ref(), green_modules.contains_key(module_name))
+        {
+            assert!(cgcx.incr_comp_session_dir.is_some());
+
+            // If a module exists in both the current and the previous session,
+            // and has the same LTO cache key in both sessions, then we can re-use it
+            if prev_key_map.keys.get(module_name) == curr_key_map.keys.get(module_name) {
+                let work_product = green_modules[module_name].clone();
+                copy_jobs.push(work_product);
+                info!(" - {}: re-used", module_name);
+                assert!(cgcx.incr_comp_session_dir.is_some());
+                continue;
+            }
+        }*/
+
+        info!(" - {}: re-compiled", module_name);
+        opt_jobs
+            .push(LtoModuleCodegen::Thin(ThinModule { shared: shared.clone(), idx: module_index }));
+    }
+
+    // Save the current ThinLTO import information for the next compilation
+    // session, overwriting the previous serialized data (if any).
+    /*if let Some(path) = key_map_path {
+        if let Err(err) = curr_key_map.save_to_file(&path) {
+            return Err(write::llvm_err(dcx, LlvmError::WriteThinLtoKey { err }));
+        }
+    }*/
+
+    // NOTE: save the temporary directory used by LTO so that it gets deleted after linking instead
+    // of now.
+    //module.module_llvm.temp_dir = Some(tmp_path);
+    // TODO: save the directory so that it gets deleted later.
+    std::mem::forget(tmp_path);
+
+    Ok((opt_jobs, copy_jobs))
+}
+
+pub unsafe fn optimize_thin_module(
+    thin_module: ThinModule<GccCodegenBackend>,
+    _cgcx: &CodegenContext<GccCodegenBackend>,
+) -> Result<ModuleCodegen<GccContext>, FatalError> {
+    //let dcx = cgcx.create_dcx();
+
+    //let module_name = &thin_module.shared.module_names[thin_module.idx];
+    /*let tm_factory_config = TargetMachineFactoryConfig::new(cgcx, module_name.to_str().unwrap());
+    let tm = (cgcx.tm_factory)(tm_factory_config).map_err(|e| write::llvm_err(&dcx, e))?;*/
+
+    // Right now the implementation we've got only works over serialized
+    // modules, so we create a fresh new LLVM context and parse the module
+    // into that context. One day, however, we may do this for upstream
+    // crates but for locally codegened modules we may be able to reuse
+    // that LLVM Context and Module.
+    //let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names);
+    //let llmod_raw = parse_module(llcx, module_name, thin_module.data(), &dcx)? as *const _;
+    let mut should_combine_object_files = false;
+    let context = match thin_module.shared.thin_buffers.get(thin_module.idx) {
+        Some(thin_buffer) => Arc::clone(&thin_buffer.context),
+        None => {
+            let context = Context::default();
+            let len = thin_module.shared.thin_buffers.len();
+            let module = &thin_module.shared.serialized_modules[thin_module.idx - len];
+            match *module {
+                SerializedModule::Local(ref module_buffer) => {
+                    let path = module_buffer.0.to_str().expect("path");
+                    context.add_driver_option(path);
+                    should_combine_object_files = true;
+                    /*module.module_llvm.should_combine_object_files = true;
+                    module
+                        .module_llvm
+                        .context
+                        .add_driver_option(module_buffer.0.to_str().expect("path"));*/
+                }
+                SerializedModule::FromRlib(_) => unimplemented!("from rlib"),
+                SerializedModule::FromUncompressedFile(_) => {
+                    unimplemented!("from uncompressed file")
+                }
+            }
+            Arc::new(SyncContext::new(context))
+        }
+    };
+    let module = ModuleCodegen {
+        module_llvm: GccContext { context, should_combine_object_files, temp_dir: None },
+        name: thin_module.name().to_string(),
+        kind: ModuleKind::Regular,
+    };
+    /*{
+        let target = &*module.module_llvm.tm;
+        let llmod = module.module_llvm.llmod();
+        save_temp_bitcode(cgcx, &module, "thin-lto-input");
+
+        // Up next comes the per-module local analyses that we do for Thin LTO.
+        // Each of these functions is basically copied from the LLVM
+        // implementation and then tailored to suit this implementation. Ideally
+        // each of these would be supported by upstream LLVM but that's perhaps
+        // a patch for another day!
+        //
+        // You can find some more comments about these functions in the LLVM
+        // bindings we've got (currently `PassWrapper.cpp`)
+        {
+            let _timer =
+                cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_rename", thin_module.name());
+            if !llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target) {
+                return Err(write::llvm_err(&dcx, LlvmError::PrepareThinLtoModule));
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
+        }
+
+        {
+            let _timer = cgcx
+                .prof
+                .generic_activity_with_arg("LLVM_thin_lto_resolve_weak", thin_module.name());
+            if !llvm::LLVMRustPrepareThinLTOResolveWeak(thin_module.shared.data.0, llmod) {
+                return Err(write::llvm_err(&dcx, LlvmError::PrepareThinLtoModule));
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-resolve");
+        }
+
+        {
+            let _timer = cgcx
+                .prof
+                .generic_activity_with_arg("LLVM_thin_lto_internalize", thin_module.name());
+            if !llvm::LLVMRustPrepareThinLTOInternalize(thin_module.shared.data.0, llmod) {
+                return Err(write::llvm_err(&dcx, LlvmError::PrepareThinLtoModule));
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-internalize");
+        }
+
+        {
+            let _timer =
+                cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_import", thin_module.name());
+            if !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod, target) {
+                return Err(write::llvm_err(&dcx, LlvmError::PrepareThinLtoModule));
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-import");
+        }
+
+        // Alright now that we've done everything related to the ThinLTO
+        // analysis it's time to run some optimizations! Here we use the same
+        // `run_pass_manager` as the "fat" LTO above except that we tell it to
+        // populate a thin-specific pass manager, which presumably LLVM treats a
+        // little differently.
+        {
+            info!("running thin lto passes over {}", module.name);
+            run_pass_manager(cgcx, &dcx, &mut module, true)?;
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-pm");
+        }
+    }*/
+    Ok(module)
+}
+
+pub struct ThinBuffer {
+    context: Arc<SyncContext>,
+}
+
+// TODO: check if this makes sense to make ThinBuffer Send and Sync.
+unsafe impl Send for ThinBuffer {}
+unsafe impl Sync for ThinBuffer {}
+
+impl ThinBuffer {
+    pub(crate) fn new(context: &Arc<SyncContext>) -> Self {
+        Self { context: Arc::clone(context) }
+    }
+}
+
+impl ThinBufferMethods for ThinBuffer {
+    fn data(&self) -> &[u8] {
+        &[]
+    }
+
+    fn thin_link_data(&self) -> &[u8] {
+        unimplemented!();
+    }
+}
+
+pub struct ThinData; //(Arc<TempDir>);
+
+fn module_name_to_str(c_str: &CStr) -> &str {
+    c_str.to_str().unwrap_or_else(|e| {
+        bug!("Encountered non-utf8 GCC module name `{}`: {}", c_str.to_string_lossy(), e)
+    })
+}
diff --git a/compiler/rustc_codegen_gcc/src/back/write.rs b/compiler/rustc_codegen_gcc/src/back/write.rs
index b9c7f72d0b7..802968979c7 100644
--- a/compiler/rustc_codegen_gcc/src/back/write.rs
+++ b/compiler/rustc_codegen_gcc/src/back/write.rs
@@ -31,6 +31,7 @@ pub(crate) unsafe fn codegen(
 
         // NOTE: Only generate object files with GIMPLE when this environment variable is set for
         // now because this requires a particular setup (same gcc/lto1/lto-wrapper commit as libgccjit).
+        // TODO: remove this environment variable.
         let fat_lto = env::var("EMBED_LTO_BITCODE").as_deref() == Ok("1");
 
         let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
@@ -56,6 +57,8 @@ pub(crate) unsafe fn codegen(
                     .generic_activity_with_arg("GCC_module_codegen_emit_bitcode", &*module.name);
                 context.add_command_line_option("-flto=auto");
                 context.add_command_line_option("-flto-partition=one");
+                // TODO: remove since we don't want fat objects when it is for Bitcode only.
+                context.add_command_line_option("-ffat-lto-objects");
                 context
                     .compile_to_file(OutputKind::ObjectFile, bc_out.to_str().expect("path to str"));
             }
@@ -104,7 +107,7 @@ pub(crate) unsafe fn codegen(
                     // FIXME(antoyo): segfault in dump_reproducer_to_file() might be caused by
                     // transmuting an rvalue to an lvalue.
                     // Segfault is actually in gcc::jit::reproducer::get_identifier_as_lvalue
-                    context.dump_reproducer_to_file(&format!("/tmp/reproducers/{}.c", module.name));
+                    context.dump_reproducer_to_file(format!("/tmp/reproducers/{}.c", module.name));
                     println!("Dumped reproducer {}", module.name);
                 }
                 if env::var("CG_GCCJIT_DUMP_TO_FILE").as_deref() == Ok("1") {
@@ -113,17 +116,20 @@ pub(crate) unsafe fn codegen(
                     context.set_debug_info(true);
                     context.dump_to_file(path, true);
                 }
-                if should_combine_object_files && fat_lto {
-                    context.add_command_line_option("-flto=auto");
-                    context.add_command_line_option("-flto-partition=one");
+                if should_combine_object_files {
+                    if fat_lto {
+                        context.add_command_line_option("-flto=auto");
+                        context.add_command_line_option("-flto-partition=one");
+
+                        // NOTE: without -fuse-linker-plugin, we get the following error:
+                        // lto1: internal compiler error: decompressed stream: Destination buffer is too small
+                        context.add_driver_option("-fuse-linker-plugin");
+                    }
 
                     context.add_driver_option("-Wl,-r");
                     // NOTE: we need -nostdlib, otherwise, we get the following error:
                     // /usr/bin/ld: cannot find -lgcc_s: No such file or directory
                     context.add_driver_option("-nostdlib");
-                    // NOTE: without -fuse-linker-plugin, we get the following error:
-                    // lto1: internal compiler error: decompressed stream: Destination buffer is too small
-                    context.add_driver_option("-fuse-linker-plugin");
 
                     // NOTE: this doesn't actually generate an executable. With the above flags, it combines the .o files together in another .o.
                     context.compile_to_file(
diff --git a/compiler/rustc_codegen_gcc/src/base.rs b/compiler/rustc_codegen_gcc/src/base.rs
index 2a2d5741d13..be149ffe5a1 100644
--- a/compiler/rustc_codegen_gcc/src/base.rs
+++ b/compiler/rustc_codegen_gcc/src/base.rs
@@ -1,8 +1,9 @@
 use std::collections::HashSet;
 use std::env;
+use std::sync::Arc;
 use std::time::Instant;
 
-use gccjit::{FunctionType, GlobalKind};
+use gccjit::{CType, FunctionType, GlobalKind};
 use rustc_codegen_ssa::base::maybe_create_entry_wrapper;
 use rustc_codegen_ssa::mono_item::MonoItemExt;
 use rustc_codegen_ssa::traits::DebugInfoMethods;
@@ -18,8 +19,8 @@ use rustc_target::spec::PanicStrategy;
 
 use crate::builder::Builder;
 use crate::context::CodegenCx;
-use crate::GccContext;
 use crate::{gcc_util, new_context, LockedTargetInfo};
+use crate::{GccContext, SyncContext};
 
 #[cfg(feature = "master")]
 pub fn visibility_to_gcc(linkage: Visibility) -> gccjit::Visibility {
@@ -135,7 +136,7 @@ pub fn compile_codegen_unit(
 
         let target_cpu = gcc_util::target_cpu(tcx.sess);
         if target_cpu != "generic" {
-            context.add_command_line_option(&format!("-march={}", target_cpu));
+            context.add_command_line_option(format!("-march={}", target_cpu));
         }
 
         if tcx
@@ -181,7 +182,24 @@ pub fn compile_codegen_unit(
         context.set_allow_unreachable_blocks(true);
 
         {
-            let cx = CodegenCx::new(&context, cgu, tcx, target_info.supports_128bit_int());
+            // TODO: to make it less error-prone (calling get_target_info() will add the flag
+            // -fsyntax-only), forbid the compilation when get_target_info() is called on a
+            // context.
+            let f16_type_supported = target_info.supports_target_dependent_type(CType::Float16);
+            let f32_type_supported = target_info.supports_target_dependent_type(CType::Float32);
+            let f64_type_supported = target_info.supports_target_dependent_type(CType::Float64);
+            let f128_type_supported = target_info.supports_target_dependent_type(CType::Float128);
+            // TODO: improve this to avoid passing that many arguments.
+            let cx = CodegenCx::new(
+                &context,
+                cgu,
+                tcx,
+                target_info.supports_128bit_int(),
+                f16_type_supported,
+                f32_type_supported,
+                f64_type_supported,
+                f128_type_supported,
+            );
 
             let mono_items = cgu.items_in_deterministic_order(tcx);
             for &(mono_item, data) in &mono_items {
@@ -205,7 +223,11 @@ pub fn compile_codegen_unit(
 
         ModuleCodegen {
             name: cgu_name.to_string(),
-            module_llvm: GccContext { context, should_combine_object_files: false, temp_dir: None },
+            module_llvm: GccContext {
+                context: Arc::new(SyncContext::new(context)),
+                should_combine_object_files: false,
+                temp_dir: None,
+            },
             kind: ModuleKind::Regular,
         }
     }
diff --git a/compiler/rustc_codegen_gcc/src/builder.rs b/compiler/rustc_codegen_gcc/src/builder.rs
index 4a3b6f678c4..b7f62fd09d2 100644
--- a/compiler/rustc_codegen_gcc/src/builder.rs
+++ b/compiler/rustc_codegen_gcc/src/builder.rs
@@ -25,7 +25,7 @@ use rustc_middle::ty::layout::{
     FnAbiError, FnAbiOfHelpers, FnAbiRequest, HasParamEnv, HasTyCtxt, LayoutError, LayoutOfHelpers,
     TyAndLayout,
 };
-use rustc_middle::ty::{ParamEnv, Ty, TyCtxt, Instance};
+use rustc_middle::ty::{Instance, ParamEnv, Ty, TyCtxt};
 use rustc_span::def_id::DefId;
 use rustc_span::Span;
 use rustc_target::abi::{
@@ -68,7 +68,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         src: RValue<'gcc>,
         order: AtomicOrdering,
     ) -> RValue<'gcc> {
-        let size = src.get_type().get_size();
+        let size = get_maybe_pointer_size(src);
 
         let func = self.current_func();
 
@@ -138,7 +138,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         failure_order: AtomicOrdering,
         weak: bool,
     ) -> RValue<'gcc> {
-        let size = src.get_type().get_size();
+        let size = get_maybe_pointer_size(src);
         let compare_exchange =
             self.context.get_builtin_function(&format!("__atomic_compare_exchange_{}", size));
         let order = self.context.new_rvalue_from_int(self.i32_type, order.to_gcc());
@@ -153,7 +153,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
         // NOTE: not sure why, but we have the wrong type here.
         let int_type = compare_exchange.get_param(2).to_rvalue().get_type();
-        let src = self.context.new_cast(self.location, src, int_type);
+        let src = self.context.new_bitcast(self.location, src, int_type);
         self.context.new_call(
             self.location,
             compare_exchange,
@@ -190,8 +190,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         let casted_args: Vec<_> = param_types
             .into_iter()
             .zip(args.iter())
-            .enumerate()
-            .map(|(_i, (expected_ty, &actual_val))| {
+            .map(|(expected_ty, &actual_val)| {
                 let actual_ty = actual_val.get_type();
                 if expected_ty != actual_ty {
                     self.bitcast(actual_val, expected_ty)
@@ -225,7 +224,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
         let mut on_stack_param_indices = FxHashSet::default();
         if let Some(indices) = self.on_stack_params.borrow().get(&gcc_func) {
-            on_stack_param_indices = indices.clone();
+            on_stack_param_indices.clone_from(indices);
         }
 
         if all_args_match {
@@ -253,11 +252,26 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
                     {
                         self.context.new_cast(self.location, actual_val, expected_ty)
                     } else if on_stack_param_indices.contains(&index) {
-                        actual_val.dereference(self.location).to_rvalue()
+                        let ty = actual_val.get_type();
+                        // It's possible that the value behind the pointer is actually not exactly
+                        // the expected type, so to go around that, we add a cast before
+                        // dereferencing the value.
+                        if let Some(pointee_val) = ty.get_pointee()
+                            && pointee_val != expected_ty
+                        {
+                            let new_val = self.context.new_cast(
+                                self.location,
+                                actual_val,
+                                expected_ty.make_pointer(),
+                            );
+                            new_val.dereference(self.location).to_rvalue()
+                        } else {
+                            actual_val.dereference(self.location).to_rvalue()
+                        }
                     } else {
                         assert!(
-                            !((actual_ty.is_vector() && !expected_ty.is_vector())
-                                || (!actual_ty.is_vector() && expected_ty.is_vector())),
+                            (!expected_ty.is_vector() || actual_ty.is_vector())
+                                && (expected_ty.is_vector() || !actual_ty.is_vector()),
                             "{:?} ({}) -> {:?} ({}), index: {:?}[{}]",
                             actual_ty,
                             actual_ty.is_vector(),
@@ -277,8 +291,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             .collect();
 
         // NOTE: to take into account variadic functions.
-        for i in casted_args.len()..args.len() {
-            casted_args.push(args[i]);
+        for arg in args.iter().skip(casted_args.len()) {
+            casted_args.push(*arg);
         }
 
         Cow::Owned(casted_args)
@@ -353,7 +367,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             let function_address_names = self.function_address_names.borrow();
             let original_function_name = function_address_names.get(&func_ptr);
             llvm::adjust_intrinsic_arguments(
-                &self,
+                self,
                 gcc_func,
                 args.into(),
                 &func_name,
@@ -361,7 +375,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             )
         };
         let args_adjusted = args.len() != previous_arg_count;
-        let args = self.check_ptr_call("call", func_ptr, &*args);
+        let args = self.check_ptr_call("call", func_ptr, &args);
 
         // gccjit requires to use the result of functions, even when it's not used.
         // That's why we assign the result to a local or call add_eval().
@@ -373,7 +387,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             unsafe { RETURN_VALUE_COUNT += 1 };
             let return_value = self.cx.context.new_call_through_ptr(self.location, func_ptr, &args);
             let return_value = llvm::adjust_intrinsic_return_value(
-                &self,
+                self,
                 return_value,
                 &func_name,
                 &args,
@@ -441,7 +455,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         self.block.add_assignment(
             self.location,
             result,
-            self.cx.context.new_call(self.location, func, &args),
+            self.cx.context.new_call(self.location, func, args),
         );
         result.to_rvalue()
     }
@@ -596,7 +610,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     ) -> RValue<'gcc> {
         let try_block = self.current_func().new_block("try");
 
-        let current_block = self.block.clone();
+        let current_block = self.block;
         self.block = try_block;
         let call = self.call(typ, fn_attrs, None, func, args, None, instance); // TODO(antoyo): use funclet here?
         self.block = current_block;
@@ -630,8 +644,9 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         then: Block<'gcc>,
         catch: Block<'gcc>,
         _funclet: Option<&Funclet>,
+        instance: Option<Instance<'tcx>>,
     ) -> RValue<'gcc> {
-        let call_site = self.call(typ, fn_attrs, None, func, args, None);
+        let call_site = self.call(typ, fn_attrs, None, func, args, None, instance);
         let condition = self.context.new_rvalue_from_int(self.bool_type, 1);
         self.llbb().end_with_conditional(self.location, condition, then, catch);
         if let Some(_fn_abi) = fn_abi {
@@ -749,6 +764,24 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
             // FIXME(antoyo): this seems to produce the wrong result.
             return self.context.new_call(self.location, fmodf, &[a, b]);
         }
+
+        #[cfg(feature = "master")]
+        match self.cx.type_kind(a_type) {
+            TypeKind::Half | TypeKind::Float => {
+                let fmodf = self.context.get_builtin_function("fmodf");
+                return self.context.new_call(self.location, fmodf, &[a, b]);
+            }
+            TypeKind::Double => {
+                let fmod = self.context.get_builtin_function("fmod");
+                return self.context.new_call(self.location, fmod, &[a, b]);
+            }
+            TypeKind::FP128 => {
+                let fmodl = self.context.get_builtin_function("fmodl");
+                return self.context.new_call(self.location, fmodl, &[a, b]);
+            }
+            _ => (),
+        }
+
         if let Some(vector_type) = a_type_unqualified.dyncast_vector() {
             assert_eq!(a_type_unqualified, b.get_type().unqualified());
 
@@ -903,11 +936,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         // TODO(antoyo): It might be better to return a LValue, but fixing the rustc API is non-trivial.
         self.stack_var_count.set(self.stack_var_count.get() + 1);
         self.current_func()
-            .new_local(
-                self.location,
-                ty,
-                &format!("stack_var_{}", self.stack_var_count.get()),
-            )
+            .new_local(self.location, ty, &format!("stack_var_{}", self.stack_var_count.get()))
             .get_address(self.location)
     }
 
@@ -993,7 +1022,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
             }
         }
 
-        let val = if let Some(_) = place.val.llextra {
+        let val = if place.val.llextra.is_some() {
             // FIXME: Merge with the `else` below?
             OperandValue::Ref(place.val)
         } else if place.layout.is_gcc_immediate() {
@@ -1125,7 +1154,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         // the following cast is required to avoid this error:
         // gcc_jit_context_new_call: mismatching types for argument 2 of function "__atomic_store_4": assignment to param arg1 (type: int) from loadedValue3577 (type: unsigned int  __attribute__((aligned(4))))
         let int_type = atomic_store.get_param(1).to_rvalue().get_type();
-        let value = self.context.new_cast(self.location, value, int_type);
+        let value = self.context.new_bitcast(self.location, value, int_type);
         self.llbb().add_eval(
             self.location,
             self.context.new_call(self.location, atomic_store, &[ptr, value, ordering]),
@@ -1172,7 +1201,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         // NOTE: due to opaque pointers now being used, we need to cast here.
         let ptr = self.context.new_cast(self.location, ptr, typ.make_pointer());
         // NOTE: array indexing is always considered in bounds in GCC (TODO(antoyo): to be verified).
-        let mut indices = indices.into_iter();
+        let mut indices = indices.iter();
         let index = indices.next().expect("first index in inbounds_gep");
         let mut result = self.context.new_array_access(self.location, ptr, *index);
         for index in indices {
@@ -1589,7 +1618,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         src: RValue<'gcc>,
         order: AtomicOrdering,
     ) -> RValue<'gcc> {
-        let size = src.get_type().get_size();
+        let size = get_maybe_pointer_size(src);
         let name = match op {
             AtomicRmwBinOp::AtomicXchg => format!("__atomic_exchange_{}", size),
             AtomicRmwBinOp::AtomicAdd => format!("__atomic_fetch_add_{}", size),
@@ -1620,7 +1649,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         let dst = self.context.new_cast(self.location, dst, volatile_void_ptr_type);
         // FIXME(antoyo): not sure why, but we have the wrong type here.
         let new_src_type = atomic_function.get_param(1).to_rvalue().get_type();
-        let src = self.context.new_cast(self.location, src, new_src_type);
+        let src = self.context.new_bitcast(self.location, src, new_src_type);
         let res = self.context.new_call(self.location, atomic_function, &[dst, src, order]);
         self.context.new_cast(self.location, res, src.get_type())
     }
@@ -1661,7 +1690,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         _instance: Option<Instance<'tcx>>,
     ) -> RValue<'gcc> {
         // FIXME(antoyo): remove when having a proper API.
-        let gcc_func = unsafe { std::mem::transmute(func) };
+        let gcc_func = unsafe { std::mem::transmute::<RValue<'gcc>, Function<'gcc>>(func) };
         let call = if self.functions.borrow().values().any(|value| *value == gcc_func) {
             self.function_call(func, args, funclet)
         } else {
@@ -1676,11 +1705,6 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
 
     fn zext(&mut self, value: RValue<'gcc>, dest_typ: Type<'gcc>) -> RValue<'gcc> {
         // FIXME(antoyo): this does not zero-extend.
-        if value.get_type().is_bool() && dest_typ.is_i8(&self.cx) {
-            // FIXME(antoyo): hack because base::from_immediate converts i1 to i8.
-            // Fix the code in codegen_ssa::base::from_immediate.
-            return value;
-        }
         self.gcc_int_cast(value, dest_typ)
     }
 
@@ -2049,7 +2073,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
                 self.context.new_rvalue_from_vector(self.location, mask_type, &vector_elements);
             let shifted = self.context.new_rvalue_vector_perm(self.location, res, res, mask);
             shift *= 2;
-            res = op(res, shifted, &self.context);
+            res = op(res, shifted, self.context);
         }
         self.context
             .new_vector_access(self.location, res, self.context.new_rvalue_zero(self.int_type))
@@ -2065,7 +2089,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
     }
 
     pub fn vector_reduce_op(&mut self, src: RValue<'gcc>, op: BinaryOp) -> RValue<'gcc> {
-        let loc = self.location.clone();
+        let loc = self.location;
         self.vector_reduce(src, |a, b, context| context.new_binary_op(loc, op, a.get_type(), a, b))
     }
 
@@ -2082,7 +2106,6 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type");
         let element_count = vector_type.get_num_units();
         (0..element_count)
-            .into_iter()
             .map(|i| {
                 self.context
                     .new_vector_access(
@@ -2113,7 +2136,6 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type");
         let element_count = vector_type.get_num_units();
         (0..element_count)
-            .into_iter()
             .map(|i| {
                 self.context
                     .new_vector_access(
@@ -2133,7 +2155,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
     // Inspired by Hacker's Delight min implementation.
     pub fn vector_reduce_min(&mut self, src: RValue<'gcc>) -> RValue<'gcc> {
-        let loc = self.location.clone();
+        let loc = self.location;
         self.vector_reduce(src, |a, b, context| {
             let differences_or_zeros = difference_or_zero(loc, a, b, context);
             context.new_binary_op(loc, BinaryOp::Plus, b.get_type(), b, differences_or_zeros)
@@ -2142,7 +2164,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
     // Inspired by Hacker's Delight max implementation.
     pub fn vector_reduce_max(&mut self, src: RValue<'gcc>) -> RValue<'gcc> {
-        let loc = self.location.clone();
+        let loc = self.location;
         self.vector_reduce(src, |a, b, context| {
             let differences_or_zeros = difference_or_zero(loc, a, b, context);
             context.new_binary_op(loc, BinaryOp::Minus, a.get_type(), a, differences_or_zeros)
@@ -2337,7 +2359,13 @@ impl<'tcx> HasParamEnv<'tcx> for Builder<'_, '_, 'tcx> {
 
 impl<'tcx> HasTargetSpec for Builder<'_, '_, 'tcx> {
     fn target_spec(&self) -> &Target {
-        &self.cx.target_spec()
+        self.cx.target_spec()
+    }
+}
+
+impl<'tcx> HasWasmCAbiOpt for Builder<'_, '_, 'tcx> {
+    fn wasm_c_abi_opt(&self) -> WasmCAbi {
+        self.cx.wasm_c_abi_opt()
     }
 }
 
@@ -2422,3 +2450,19 @@ impl ToGccOrdering for AtomicOrdering {
         ordering as i32
     }
 }
+
+// Needed because gcc 12 `get_size()` doesn't work on pointers.
+#[cfg(feature = "master")]
+fn get_maybe_pointer_size(value: RValue<'_>) -> u32 {
+    value.get_type().get_size()
+}
+
+#[cfg(not(feature = "master"))]
+fn get_maybe_pointer_size(value: RValue<'_>) -> u32 {
+    let type_ = value.get_type();
+    if type_.get_pointee().is_some() {
+        std::mem::size_of::<*const ()>() as _
+    } else {
+        type_.get_size()
+    }
+}
diff --git a/compiler/rustc_codegen_gcc/src/callee.rs b/compiler/rustc_codegen_gcc/src/callee.rs
index 84f49b6856d..9ad2e90122f 100644
--- a/compiler/rustc_codegen_gcc/src/callee.rs
+++ b/compiler/rustc_codegen_gcc/src/callee.rs
@@ -28,7 +28,7 @@ pub fn get_fn<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, instance: Instance<'tcx>)
 
     let fn_abi = cx.fn_abi_of_instance(instance, ty::List::empty());
 
-    let func = if let Some(_func) = cx.get_declared_value(&sym) {
+    let func = if let Some(_func) = cx.get_declared_value(sym) {
         // FIXME(antoyo): we never reach this because get_declared_value only returns global variables
         // and here we try to get a function.
         unreachable!();
@@ -68,7 +68,7 @@ pub fn get_fn<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, instance: Instance<'tcx>)
         }*/
     } else {
         cx.linkage.set(FunctionType::Extern);
-        let func = cx.declare_fn(&sym, &fn_abi);
+        let func = cx.declare_fn(sym, fn_abi);
 
         attributes::from_fn_attrs(cx, func, instance);
 
diff --git a/compiler/rustc_codegen_gcc/src/common.rs b/compiler/rustc_codegen_gcc/src/common.rs
index fa8a1ec037c..19333689aaa 100644
--- a/compiler/rustc_codegen_gcc/src/common.rs
+++ b/compiler/rustc_codegen_gcc/src/common.rs
@@ -21,7 +21,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
 
     fn global_string(&self, string: &str) -> LValue<'gcc> {
         // TODO(antoyo): handle non-null-terminated strings.
-        let string = self.context.new_string_literal(&*string);
+        let string = self.context.new_string_literal(string);
         let sym = self.generate_local_symbol_name("str");
         let global = self.declare_private_global(&sym, self.val_ty(string));
         global.global_set_initializer_rvalue(string);
@@ -187,7 +187,8 @@ impl<'gcc, 'tcx> ConstMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
                     return self
                         .context
                         .new_rvalue_from_double(ty, f32::from_bits(data as u32) as f64);
-                } else if ty == self.double_type {
+                }
+                if ty == self.double_type {
                     return self.context.new_rvalue_from_double(ty, f64::from_bits(data as u64));
                 }
 
@@ -297,7 +298,7 @@ impl<'gcc, 'tcx> SignType<'gcc, 'tcx> for Type<'gcc> {
         } else if self.is_ulonglong(cx) {
             cx.longlong_type
         } else {
-            self.clone()
+            *self
         }
     }
 
@@ -323,7 +324,7 @@ impl<'gcc, 'tcx> SignType<'gcc, 'tcx> for Type<'gcc> {
         } else if self.is_longlong(cx) {
             cx.ulonglong_type
         } else {
-            self.clone()
+            *self
         }
     }
 }
@@ -436,7 +437,7 @@ impl<'gcc, 'tcx> TypeReflection<'gcc, 'tcx> for Type<'gcc> {
     }
 
     fn is_vector(&self) -> bool {
-        let mut typ = self.clone();
+        let mut typ = *self;
         loop {
             if typ.dyncast_vector().is_some() {
                 return true;
diff --git a/compiler/rustc_codegen_gcc/src/consts.rs b/compiler/rustc_codegen_gcc/src/consts.rs
index 3d73a60b255..ba7e08e33ef 100644
--- a/compiler/rustc_codegen_gcc/src/consts.rs
+++ b/compiler/rustc_codegen_gcc/src/consts.rs
@@ -1,15 +1,16 @@
 #[cfg(feature = "master")]
 use gccjit::{FnAttribute, VarAttribute, Visibility};
-use gccjit::{Function, GlobalKind, LValue, RValue, ToRValue};
-use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods, DerivedTypeMethods, StaticMethods};
+use gccjit::{Function, GlobalKind, LValue, RValue, ToRValue, Type};
+use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods, StaticMethods};
+use rustc_hir::def::DefKind;
+use rustc_middle::bug;
 use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, CodegenFnAttrs};
 use rustc_middle::mir::interpret::{
     self, read_target_uint, ConstAllocation, ErrorHandled, Scalar as InterpScalar,
 };
-use rustc_middle::mir::mono::MonoItem;
 use rustc_middle::span_bug;
 use rustc_middle::ty::layout::LayoutOf;
-use rustc_middle::ty::{self, Instance, Ty};
+use rustc_middle::ty::{self, Instance};
 use rustc_span::def_id::DefId;
 use rustc_target::abi::{self, Align, HasDataLayout, Primitive, Size, WrappingRange};
 
@@ -63,16 +64,15 @@ impl<'gcc, 'tcx> StaticMethods for CodegenCx<'gcc, 'tcx> {
         global_value
     }
 
+    #[cfg_attr(not(feature = "master"), allow(unused_mut))]
     fn codegen_static(&self, def_id: DefId) {
         let attrs = self.tcx.codegen_fn_attrs(def_id);
 
-        let value = match codegen_static_initializer(&self, def_id) {
-            Ok((value, _)) => value,
+        let Ok((value, alloc)) = codegen_static_initializer(self, def_id) else {
             // Error has already been reported
-            Err(_) => return,
+            return;
         };
-
-        let global = self.get_static(def_id);
+        let alloc = alloc.inner();
 
         // boolean SSA values are i1, but they have to be stored in i8 slots,
         // otherwise some LLVM optimization passes don't work as expected
@@ -81,23 +81,25 @@ impl<'gcc, 'tcx> StaticMethods for CodegenCx<'gcc, 'tcx> {
             unimplemented!();
         };
 
-        let instance = Instance::mono(self.tcx, def_id);
-        let ty = instance.ty(self.tcx, ty::ParamEnv::reveal_all());
-        let gcc_type = self.layout_of(ty).gcc_type(self);
+        let is_thread_local = attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL);
+        let global = self.get_static_inner(def_id, val_llty);
 
-        set_global_alignment(self, global, self.align_of(ty));
+        #[cfg(feature = "master")]
+        if global.to_rvalue().get_type() != val_llty {
+            global.to_rvalue().set_type(val_llty);
+        }
+        set_global_alignment(self, global, alloc.align);
 
-        let value = self.bitcast_if_needed(value, gcc_type);
         global.global_set_initializer_rvalue(value);
 
         // As an optimization, all shared statics which do not have interior
         // mutability are placed into read-only memory.
-        if !self.tcx.static_mutability(def_id).unwrap().is_mut() && self.type_is_freeze(ty) {
+        if alloc.mutability.is_not() {
             #[cfg(feature = "master")]
             global.global_set_readonly();
         }
 
-        if attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL) {
+        if is_thread_local {
             // Do not allow LLVM to change the alignment of a TLS on macOS.
             //
             // By default a global's alignment can be freely increased.
@@ -205,35 +207,49 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
 
     pub fn get_static(&self, def_id: DefId) -> LValue<'gcc> {
         let instance = Instance::mono(self.tcx, def_id);
-        let fn_attrs = self.tcx.codegen_fn_attrs(def_id);
+        let DefKind::Static { nested, .. } = self.tcx.def_kind(def_id) else { bug!() };
+        // Nested statics do not have a type, so pick a random type and let `define_static` figure out
+        // the gcc type from the actual evaluated initializer.
+        let gcc_type = if nested {
+            self.type_i8()
+        } else {
+            let ty = instance.ty(self.tcx, ty::ParamEnv::reveal_all());
+            self.layout_of(ty).gcc_type(self)
+        };
+
+        self.get_static_inner(def_id, gcc_type)
+    }
+
+    pub(crate) fn get_static_inner(&self, def_id: DefId, gcc_type: Type<'gcc>) -> LValue<'gcc> {
+        let instance = Instance::mono(self.tcx, def_id);
         if let Some(&global) = self.instances.borrow().get(&instance) {
+            trace!("used cached value");
             return global;
         }
 
-        let defined_in_current_codegen_unit =
-            self.codegen_unit.items().contains_key(&MonoItem::Static(def_id));
-        assert!(
-            !defined_in_current_codegen_unit,
-            "consts::get_static() should always hit the cache for \
-                 statics defined in the same CGU, but did not for `{:?}`",
-            def_id
-        );
-
-        let ty = instance.ty(self.tcx, ty::ParamEnv::reveal_all());
+        // FIXME: Once we stop removing globals in `codegen_static`, we can uncomment this code.
+        // let defined_in_current_codegen_unit =
+        //     self.codegen_unit.items().contains_key(&MonoItem::Static(def_id));
+        // assert!(
+        //     !defined_in_current_codegen_unit,
+        //     "consts::get_static() should always hit the cache for \
+        //          statics defined in the same CGU, but did not for `{:?}`",
+        //     def_id
+        // );
         let sym = self.tcx.symbol_name(instance).name;
+        let fn_attrs = self.tcx.codegen_fn_attrs(def_id);
 
         let global = if def_id.is_local() && !self.tcx.is_foreign_item(def_id) {
-            let llty = self.layout_of(ty).gcc_type(self);
             if let Some(global) = self.get_declared_value(sym) {
-                if self.val_ty(global) != self.type_ptr_to(llty) {
+                if self.val_ty(global) != self.type_ptr_to(gcc_type) {
                     span_bug!(self.tcx.def_span(def_id), "Conflicting types for static");
                 }
             }
 
             let is_tls = fn_attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL);
             let global = self.declare_global(
-                &sym,
-                llty,
+                sym,
+                gcc_type,
                 GlobalKind::Exported,
                 is_tls,
                 fn_attrs.link_section,
@@ -246,7 +262,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
 
             global
         } else {
-            check_and_apply_linkage(&self, &fn_attrs, ty, sym)
+            check_and_apply_linkage(self, fn_attrs, gcc_type, sym)
         };
 
         if !def_id.is_local() {
@@ -360,18 +376,14 @@ fn codegen_static_initializer<'gcc, 'tcx>(
 fn check_and_apply_linkage<'gcc, 'tcx>(
     cx: &CodegenCx<'gcc, 'tcx>,
     attrs: &CodegenFnAttrs,
-    ty: Ty<'tcx>,
+    gcc_type: Type<'gcc>,
     sym: &str,
 ) -> LValue<'gcc> {
     let is_tls = attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL);
-    let gcc_type = cx.layout_of(ty).gcc_type(cx);
     if let Some(linkage) = attrs.import_linkage {
         // Declare a symbol `foo` with the desired linkage.
-        let global1 = cx.declare_global_with_linkage(
-            &sym,
-            cx.type_i8(),
-            base::global_linkage_to_gcc(linkage),
-        );
+        let global1 =
+            cx.declare_global_with_linkage(sym, cx.type_i8(), base::global_linkage_to_gcc(linkage));
 
         // Declare an internal global `extern_with_linkage_foo` which
         // is initialized with the address of `foo`.  If `foo` is
@@ -380,7 +392,7 @@ fn check_and_apply_linkage<'gcc, 'tcx>(
         // `extern_with_linkage_foo` will instead be initialized to
         // zero.
         let mut real_name = "_rust_extern_with_linkage_".to_string();
-        real_name.push_str(&sym);
+        real_name.push_str(sym);
         let global2 = cx.define_global(&real_name, gcc_type, is_tls, attrs.link_section);
         // TODO(antoyo): set linkage.
         let value = cx.const_ptrcast(global1.get_address(None), gcc_type);
@@ -397,6 +409,6 @@ fn check_and_apply_linkage<'gcc, 'tcx>(
         // don't do this then linker errors can be generated where the linker
         // complains that one object files has a thread local version of the
         // symbol and another one doesn't.
-        cx.declare_global(&sym, gcc_type, GlobalKind::Imported, is_tls, attrs.link_section)
+        cx.declare_global(sym, gcc_type, GlobalKind::Imported, is_tls, attrs.link_section)
     }
 }
diff --git a/compiler/rustc_codegen_gcc/src/context.rs b/compiler/rustc_codegen_gcc/src/context.rs
index 1d689c9ac0e..86a5000a723 100644
--- a/compiler/rustc_codegen_gcc/src/context.rs
+++ b/compiler/rustc_codegen_gcc/src/context.rs
@@ -68,6 +68,10 @@ pub struct CodegenCx<'gcc, 'tcx> {
     pub sizet_type: Type<'gcc>,
 
     pub supports_128bit_integers: bool,
+    pub supports_f16_type: bool,
+    pub supports_f32_type: bool,
+    pub supports_f64_type: bool,
+    pub supports_f128_type: bool,
 
     pub float_type: Type<'gcc>,
     pub double_type: Type<'gcc>,
@@ -110,7 +114,7 @@ pub struct CodegenCx<'gcc, 'tcx> {
     local_gen_sym_counter: Cell<usize>,
 
     eh_personality: Cell<Option<RValue<'gcc>>>,
-    #[cfg(feature="master")]
+    #[cfg(feature = "master")]
     pub rust_try_fn: Cell<Option<(Type<'gcc>, Function<'gcc>)>>,
 
     pub pointee_infos: RefCell<FxHashMap<(Ty<'tcx>, Size), Option<PointeeInfo>>>,
@@ -122,16 +126,21 @@ pub struct CodegenCx<'gcc, 'tcx> {
     /// FIXME(antoyo): fix the rustc API to avoid having this hack.
     pub structs_as_pointer: RefCell<FxHashSet<RValue<'gcc>>>,
 
-    #[cfg(feature="master")]
+    #[cfg(feature = "master")]
     pub cleanup_blocks: RefCell<FxHashSet<Block<'gcc>>>,
 }
 
 impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
+    #[allow(clippy::too_many_arguments)]
     pub fn new(
         context: &'gcc Context<'gcc>,
         codegen_unit: &'tcx CodegenUnit<'tcx>,
         tcx: TyCtxt<'tcx>,
         supports_128bit_integers: bool,
+        supports_f16_type: bool,
+        supports_f32_type: bool,
+        supports_f64_type: bool,
+        supports_f128_type: bool,
     ) -> Self {
         let create_type = |ctype, rust_type| {
             let layout = tcx.layout_of(ParamEnv::reveal_all().and(rust_type)).unwrap();
@@ -304,6 +313,10 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
             sizet_type,
 
             supports_128bit_integers,
+            supports_f16_type,
+            supports_f32_type,
+            supports_f64_type,
+            supports_f128_type,
 
             float_type,
             double_type,
@@ -324,11 +337,11 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
             struct_types: Default::default(),
             local_gen_sym_counter: Cell::new(0),
             eh_personality: Cell::new(None),
-            #[cfg(feature="master")]
+            #[cfg(feature = "master")]
             rust_try_fn: Cell::new(None),
             pointee_infos: Default::default(),
             structs_as_pointer: Default::default(),
-            #[cfg(feature="master")]
+            #[cfg(feature = "master")]
             cleanup_blocks: Default::default(),
         };
         // TODO(antoyo): instead of doing this, add SsizeT to libgccjit.
@@ -385,7 +398,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
     }
 
     pub fn sess(&self) -> &'tcx Session {
-        &self.tcx.sess
+        self.tcx.sess
     }
 
     pub fn bitcast_if_needed(
@@ -432,7 +445,9 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
         let func_name = self.tcx.symbol_name(instance).name;
 
         let func = if self.intrinsics.borrow().contains_key(func_name) {
-            self.intrinsics.borrow()[func_name].clone()
+            self.intrinsics.borrow()[func_name]
+        } else if let Some(variable) = self.get_declared_value(func_name) {
+            return variable;
         } else {
             get_fn(self, instance)
         };
@@ -485,7 +500,7 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
                 let symbol_name = tcx.symbol_name(instance).name;
                 let fn_abi = self.fn_abi_of_instance(instance, ty::List::empty());
                 self.linkage.set(FunctionType::Extern);
-                let func = self.declare_fn(symbol_name, &fn_abi);
+                let func = self.declare_fn(symbol_name, fn_abi);
                 let func: RValue<'gcc> = unsafe { std::mem::transmute(func) };
                 func
             }
@@ -496,7 +511,7 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
                     "rust_eh_personality"
                 };
                 let func = self.declare_func(name, self.type_i32(), &[], true);
-                unsafe { std::mem::transmute(func) }
+                unsafe { std::mem::transmute::<Function<'gcc>, RValue<'gcc>>(func) }
             }
         };
         // TODO(antoyo): apply target cpu attributes.
@@ -505,7 +520,7 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
     }
 
     fn sess(&self) -> &Session {
-        &self.tcx.sess
+        self.tcx.sess
     }
 
     fn codegen_unit(&self) -> &'tcx CodegenUnit<'tcx> {
@@ -522,7 +537,7 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
 
     fn declare_c_main(&self, fn_type: Self::Type) -> Option<Self::Function> {
         let entry_name = self.sess().target.entry_name.as_ref();
-        if self.get_declared_value(entry_name).is_none() {
+        if !self.functions.borrow().contains_key(entry_name) {
             Some(self.declare_entry_fn(entry_name, fn_type, ()))
         } else {
             // If the symbol already exists, it is an error: for example, the user wrote
@@ -614,7 +629,7 @@ impl<'b, 'tcx> CodegenCx<'b, 'tcx> {
         // user defined names
         let mut name = String::with_capacity(prefix.len() + 6);
         name.push_str(prefix);
-        name.push_str(".");
+        name.push('.');
         name.push_str(&(idx as u64).to_base(ALPHANUMERIC_ONLY));
         name
     }
diff --git a/compiler/rustc_codegen_gcc/src/debuginfo.rs b/compiler/rustc_codegen_gcc/src/debuginfo.rs
index 1d0a6d9f09b..3d9ea278a63 100644
--- a/compiler/rustc_codegen_gcc/src/debuginfo.rs
+++ b/compiler/rustc_codegen_gcc/src/debuginfo.rs
@@ -90,7 +90,7 @@ fn compute_mir_scopes<'gcc, 'tcx>(
 /// FIXME(tempdragon/?): Add Scope Support Here.
 fn make_mir_scope<'gcc, 'tcx>(
     cx: &CodegenCx<'gcc, 'tcx>,
-    instance: Instance<'tcx>,
+    _instance: Instance<'tcx>,
     mir: &Body<'tcx>,
     variables: &Option<BitSet<SourceScope>>,
     debug_context: &mut FunctionDebugContext<'tcx, (), Location<'gcc>>,
@@ -103,7 +103,7 @@ fn make_mir_scope<'gcc, 'tcx>(
 
     let scope_data = &mir.source_scopes[scope];
     let parent_scope = if let Some(parent) = scope_data.parent_scope {
-        make_mir_scope(cx, instance, mir, variables, debug_context, instantiated, parent);
+        make_mir_scope(cx, _instance, mir, variables, debug_context, instantiated, parent);
         debug_context.scopes[parent]
     } else {
         // The root is the function itself.
@@ -117,7 +117,7 @@ fn make_mir_scope<'gcc, 'tcx>(
         return;
     };
 
-    if let Some(vars) = variables {
+    if let Some(ref vars) = *variables {
         if !vars.contains(scope) && scope_data.inlined.is_none() {
             // Do not create a DIScope if there are no variables defined in this
             // MIR `SourceScope`, and it's not `inlined`, to avoid debuginfo bloat.
@@ -135,8 +135,14 @@ fn make_mir_scope<'gcc, 'tcx>(
     let inlined_at = scope_data.inlined.map(|(_, callsite_span)| {
         // FIXME(eddyb) this doesn't account for the macro-related
         // `Span` fixups that `rustc_codegen_ssa::mir::debuginfo` does.
-        let callsite_scope = parent_scope.adjust_dbg_scope_for_span(cx, callsite_span);
-        cx.dbg_loc(callsite_scope, parent_scope.inlined_at, callsite_span)
+
+        // TODO(tempdragon): Add scope support and then revert to cg_llvm version of this closure
+        // NOTE: These variables passed () here.
+        // Changed to comply to clippy.
+
+        /* let callsite_scope =  */
+        parent_scope.adjust_dbg_scope_for_span(cx, callsite_span);
+        cx.dbg_loc(/* callsite_scope */ (), parent_scope.inlined_at, callsite_span)
     });
     let p_inlined_at = parent_scope.inlined_at;
     // TODO(tempdragon): dbg_scope: Add support for scope extension here.
@@ -224,7 +230,7 @@ impl<'gcc, 'tcx> DebugInfoMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
             file_end_pos: BytePos(0),
         };
         let mut fn_debug_context = FunctionDebugContext {
-            scopes: IndexVec::from_elem(empty_scope, &mir.source_scopes.as_slice()),
+            scopes: IndexVec::from_elem(empty_scope, mir.source_scopes.as_slice()),
             inlined_function_scopes: Default::default(),
         };
 
@@ -273,16 +279,19 @@ impl<'gcc, 'tcx> DebugInfoMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
     ) -> Self::DILocation {
         let pos = span.lo();
         let DebugLoc { file, line, col } = self.lookup_debug_loc(pos);
-        let loc = match &file.name {
-            rustc_span::FileName::Real(name) => match name {
-                rustc_span::RealFileName::LocalPath(name) => {
+        let loc = match file.name {
+            rustc_span::FileName::Real(ref name) => match *name {
+                rustc_span::RealFileName::LocalPath(ref name) => {
                     if let Some(name) = name.to_str() {
                         self.context.new_location(name, line as i32, col as i32)
                     } else {
                         Location::null()
                     }
                 }
-                rustc_span::RealFileName::Remapped { local_path, virtual_name: _ } => {
+                rustc_span::RealFileName::Remapped {
+                    ref local_path,
+                    virtual_name: ref _unused,
+                } => {
                     if let Some(name) = local_path.as_ref() {
                         if let Some(name) = name.to_str() {
                             self.context.new_location(name, line as i32, col as i32)
diff --git a/compiler/rustc_codegen_gcc/src/declare.rs b/compiler/rustc_codegen_gcc/src/declare.rs
index db6edbab12d..a2b158ee0a7 100644
--- a/compiler/rustc_codegen_gcc/src/declare.rs
+++ b/compiler/rustc_codegen_gcc/src/declare.rs
@@ -35,7 +35,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
 
     pub fn declare_unnamed_global(&self, ty: Type<'gcc>) -> LValue<'gcc> {
         let name = self.generate_local_symbol_name("global");
-        self.context.new_global(None, GlobalKind::Internal, ty, &name)
+        self.context.new_global(None, GlobalKind::Internal, ty, name)
     }
 
     pub fn declare_global_with_linkage(
@@ -176,16 +176,14 @@ fn declare_raw_fn<'gcc>(
         cx.functions.borrow()[name]
     } else {
         let params: Vec<_> = param_types
-            .into_iter()
+            .iter()
             .enumerate()
-            .map(|(index, param)| {
-                cx.context.new_parameter(None, *param, &format!("param{}", index))
-            }) // TODO(antoyo): set name.
+            .map(|(index, param)| cx.context.new_parameter(None, *param, format!("param{}", index))) // TODO(antoyo): set name.
             .collect();
         #[cfg(not(feature = "master"))]
-        let name = mangle_name(name);
+        let name = &mangle_name(name);
         let func =
-            cx.context.new_function(None, cx.linkage.get(), return_type, &params, &name, variadic);
+            cx.context.new_function(None, cx.linkage.get(), return_type, &params, name, variadic);
         cx.functions.borrow_mut().insert(name.to_string(), func);
 
         #[cfg(feature = "master")]
@@ -200,10 +198,10 @@ fn declare_raw_fn<'gcc>(
             // create a wrapper function that calls rust_eh_personality.
 
             let params: Vec<_> = param_types
-                .into_iter()
+                .iter()
                 .enumerate()
                 .map(|(index, param)| {
-                    cx.context.new_parameter(None, *param, &format!("param{}", index))
+                    cx.context.new_parameter(None, *param, format!("param{}", index))
                 }) // TODO(antoyo): set name.
                 .collect();
             let gcc_func = cx.context.new_function(
diff --git a/compiler/rustc_codegen_gcc/src/int.rs b/compiler/rustc_codegen_gcc/src/int.rs
index 841bcf592e4..e4c5eb91373 100644
--- a/compiler/rustc_codegen_gcc/src/int.rs
+++ b/compiler/rustc_codegen_gcc/src/int.rs
@@ -2,8 +2,6 @@
 //! This module exists because some integer types are not supported on some gcc platforms, e.g.
 //! 128-bit integers on 32-bit platforms and thus require to be handled manually.
 
-use std::convert::TryFrom;
-
 use gccjit::{BinaryOp, ComparisonOp, FunctionType, Location, RValue, ToRValue, Type, UnaryOp};
 use rustc_codegen_ssa::common::{IntPredicate, TypeKind};
 use rustc_codegen_ssa::traits::{BackendTypes, BaseTypeMethods, BuilderMethods, OverflowOp};
@@ -40,7 +38,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             self.cx.context.new_unary_op(self.location, operation, typ, a)
         } else {
             let element_type = typ.dyncast_array().expect("element type");
-            self.from_low_high_rvalues(
+            self.concat_low_high_rvalues(
                 typ,
                 self.cx.context.new_unary_op(
                     self.location,
@@ -83,7 +81,19 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
                 let b = self.context.new_cast(self.location, b, a_type);
                 a >> b
             } else {
-                a >> b
+                let a_size = a_type.get_size();
+                let b_size = b_type.get_size();
+                match a_size.cmp(&b_size) {
+                    std::cmp::Ordering::Less => {
+                        let a = self.context.new_cast(self.location, a, b_type);
+                        a >> b
+                    }
+                    std::cmp::Ordering::Equal => a >> b,
+                    std::cmp::Ordering::Greater => {
+                        let b = self.context.new_cast(self.location, b, a_type);
+                        a >> b
+                    }
+                }
             }
         } else if a_type.is_vector() && a_type.is_vector() {
             a >> b
@@ -114,7 +124,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             let shift_value = self.gcc_sub(b, sixty_four);
             let high = self.high(a);
             let sign = if a_type.is_signed(self) { high >> sixty_three } else { zero };
-            let array_value = self.from_low_high_rvalues(a_type, high >> shift_value, sign);
+            let array_value = self.concat_low_high_rvalues(a_type, high >> shift_value, sign);
             then_block.add_assignment(self.location, result, array_value);
             then_block.end_with_jump(self.location, after_block);
 
@@ -126,12 +136,15 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
             let shift_value = self.gcc_sub(sixty_four, b);
             // NOTE: cast low to its unsigned type in order to perform a logical right shift.
-            let unsigned_type = native_int_type.to_unsigned(&self.cx);
+            let unsigned_type = native_int_type.to_unsigned(self.cx);
             let casted_low = self.context.new_cast(self.location, self.low(a), unsigned_type);
             let shifted_low = casted_low >> self.context.new_cast(self.location, b, unsigned_type);
             let shifted_low = self.context.new_cast(self.location, shifted_low, native_int_type);
-            let array_value =
-                self.from_low_high_rvalues(a_type, (high << shift_value) | shifted_low, high >> b);
+            let array_value = self.concat_low_high_rvalues(
+                a_type,
+                (high << shift_value) | shifted_low,
+                high >> b,
+            );
             actual_else_block.add_assignment(self.location, result, array_value);
             actual_else_block.end_with_jump(self.location, after_block);
 
@@ -255,10 +268,10 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
     ) -> (<Self as BackendTypes>::Value, <Self as BackendTypes>::Value) {
         use rustc_middle::ty::{Int, IntTy::*, Uint, UintTy::*};
 
-        let new_kind = match typ.kind() {
+        let new_kind = match *typ.kind() {
             Int(t @ Isize) => Int(t.normalize(self.tcx.sess.target.pointer_width)),
             Uint(t @ Usize) => Uint(t.normalize(self.tcx.sess.target.pointer_width)),
-            t @ (Uint(_) | Int(_)) => t.clone(),
+            t @ (Uint(_) | Int(_)) => t,
             _ => panic!("tried to get overflow intrinsic for op applied to non-int type"),
         };
 
@@ -344,7 +357,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             }
         };
 
-        let intrinsic = self.context.get_builtin_function(&name);
+        let intrinsic = self.context.get_builtin_function(name);
         let res = self
             .current_func()
             // TODO(antoyo): is it correct to use rhs type instead of the parameter typ?
@@ -454,7 +467,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             let native_int_type = a_type.dyncast_array().expect("get element type");
             // NOTE: cast low to its unsigned type in order to perform a comparison correctly (e.g.
             // the sign is only on high).
-            let unsigned_type = native_int_type.to_unsigned(&self.cx);
+            let unsigned_type = native_int_type.to_unsigned(self.cx);
 
             let lhs_low = self.context.new_cast(self.location, self.low(lhs), unsigned_type);
             let rhs_low = self.context.new_cast(self.location, self.low(rhs), unsigned_type);
@@ -589,7 +602,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
                 | IntPredicate::IntULT
                 | IntPredicate::IntULE => {
                     if !a_type.is_vector() {
-                        let unsigned_type = a_type.to_unsigned(&self.cx);
+                        let unsigned_type = a_type.to_unsigned(self.cx);
                         lhs = self.context.new_cast(self.location, lhs, unsigned_type);
                         rhs = self.context.new_cast(self.location, rhs, unsigned_type);
                     }
@@ -612,7 +625,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         {
             a ^ b
         } else {
-            self.from_low_high_rvalues(
+            self.concat_low_high_rvalues(
                 a_type,
                 self.low(a) ^ self.low(b),
                 self.high(a) ^ self.high(b),
@@ -635,7 +648,19 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
                 let b = self.context.new_cast(self.location, b, a_type);
                 a << b
             } else {
-                a << b
+                let a_size = a_type.get_size();
+                let b_size = b_type.get_size();
+                match a_size.cmp(&b_size) {
+                    std::cmp::Ordering::Less => {
+                        let a = self.context.new_cast(self.location, a, b_type);
+                        a << b
+                    }
+                    std::cmp::Ordering::Equal => a << b,
+                    std::cmp::Ordering::Greater => {
+                        let b = self.context.new_cast(self.location, b, a_type);
+                        a << b
+                    }
+                }
             }
         } else if a_type.is_vector() && a_type.is_vector() {
             a << b
@@ -661,7 +686,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             self.llbb().end_with_conditional(self.location, condition, then_block, else_block);
 
             let array_value =
-                self.from_low_high_rvalues(a_type, zero, self.low(a) << (b - sixty_four));
+                self.concat_low_high_rvalues(a_type, zero, self.low(a) << (b - sixty_four));
             then_block.add_assignment(self.location, result, array_value);
             then_block.end_with_jump(self.location, after_block);
 
@@ -673,13 +698,13 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
             // NOTE: cast low to its unsigned type in order to perform a logical right shift.
             // TODO(antoyo): adjust this ^ comment.
-            let unsigned_type = native_int_type.to_unsigned(&self.cx);
+            let unsigned_type = native_int_type.to_unsigned(self.cx);
             let casted_low = self.context.new_cast(self.location, self.low(a), unsigned_type);
             let shift_value = self.context.new_cast(self.location, sixty_four - b, unsigned_type);
             let high_low =
                 self.context.new_cast(self.location, casted_low >> shift_value, native_int_type);
 
-            let array_value = self.from_low_high_rvalues(
+            let array_value = self.concat_low_high_rvalues(
                 a_type,
                 self.low(a) << b,
                 (self.high(a) << b) | high_low,
@@ -708,7 +733,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
             // NOTE: we also need to swap the two elements here, in addition to swapping inside
             // the elements themselves like done above.
-            return self.from_low_high_rvalues(arg_type, swapped_msb, swapped_lsb);
+            return self.concat_low_high_rvalues(arg_type, swapped_msb, swapped_lsb);
         }
 
         // TODO(antoyo): check if it's faster to use string literals and a
@@ -727,10 +752,10 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
     pub fn gcc_int(&self, typ: Type<'gcc>, int: i64) -> RValue<'gcc> {
         if self.is_native_int_type_or_bool(typ) {
-            self.context.new_rvalue_from_long(typ, i64::try_from(int).expect("i64::try_from"))
+            self.context.new_rvalue_from_long(typ, int)
         } else {
             // NOTE: set the sign in high.
-            self.from_low_high(typ, int, -(int.is_negative() as i64))
+            self.concat_low_high(typ, int, -(int.is_negative() as i64))
         }
     }
 
@@ -740,10 +765,9 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
             let num = self.context.new_rvalue_from_long(self.u64_type, int as i64);
             self.gcc_int_cast(num, typ)
         } else if self.is_native_int_type_or_bool(typ) {
-            self.context
-                .new_rvalue_from_long(typ, u64::try_from(int).expect("u64::try_from") as i64)
+            self.context.new_rvalue_from_long(typ, int as i64)
         } else {
-            self.from_low_high(typ, int as i64, 0)
+            self.concat_low_high(typ, int as i64, 0)
         }
     }
 
@@ -760,7 +784,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
                 let shift = high << sixty_four;
                 shift | self.context.new_cast(None, low, typ)
             } else {
-                self.from_low_high(typ, low as i64, high as i64)
+                self.concat_low_high(typ, low as i64, high as i64)
             }
         } else if typ.is_i128(self) {
             // FIXME(antoyo): libgccjit cannot create 128-bit values yet.
@@ -775,7 +799,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
         if self.is_native_int_type_or_bool(typ) {
             self.context.new_rvalue_zero(typ)
         } else {
-            self.from_low_high(typ, 0, 0)
+            self.concat_low_high(typ, 0, 0)
         }
     }
 
@@ -813,7 +837,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
                 "both types should either be native or non-native for or operation"
             );
             let native_int_type = a_type.dyncast_array().expect("get element type");
-            self.from_low_high_rvalues(
+            self.concat_low_high_rvalues(
                 a_type,
                 self.context.new_binary_op(
                     loc,
@@ -858,7 +882,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
             let is_negative =
                 self.context.new_comparison(None, ComparisonOp::LessThan, value, zero);
             let is_negative = self.gcc_int_cast(is_negative, dest_element_type);
-            self.from_low_high_rvalues(
+            self.concat_low_high_rvalues(
                 dest_typ,
                 self.context.new_cast(None, value, dest_element_type),
                 self.context.new_unary_op(None, UnaryOp::Minus, dest_element_type, is_negative),
@@ -926,7 +950,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
             return self.context.new_cast(None, value, dest_typ);
         }
 
-        debug_assert!(value_type.dyncast_array().is_some());
+        debug_assert!(dest_typ.dyncast_array().is_some());
         let name_suffix = match self.type_kind(value_type) {
             TypeKind::Float => "sfti",
             TypeKind::Double => "dfti",
@@ -978,7 +1002,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
             .to_rvalue()
     }
 
-    fn from_low_high_rvalues(
+    fn concat_low_high_rvalues(
         &self,
         typ: Type<'gcc>,
         low: RValue<'gcc>,
@@ -993,7 +1017,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
         self.context.new_array_constructor(None, typ, &values)
     }
 
-    fn from_low_high(&self, typ: Type<'gcc>, low: i64, high: i64) -> RValue<'gcc> {
+    fn concat_low_high(&self, typ: Type<'gcc>, low: i64, high: i64) -> RValue<'gcc> {
         let (first, last) = match self.sess().target.options.endian {
             Endian::Little => (low, high),
             Endian::Big => (high, low),
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs b/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
index c4ae1751fa0..f7500933789 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
@@ -74,6 +74,10 @@ match name {
     "llvm.amdgcn.cvt.sr.bf8.f32" => "__builtin_amdgcn_cvt_sr_bf8_f32",
     "llvm.amdgcn.cvt.sr.fp8.f32" => "__builtin_amdgcn_cvt_sr_fp8_f32",
     "llvm.amdgcn.dispatch.id" => "__builtin_amdgcn_dispatch_id",
+    "llvm.amdgcn.dot4.f32.bf8.bf8" => "__builtin_amdgcn_dot4_f32_bf8_bf8",
+    "llvm.amdgcn.dot4.f32.bf8.fp8" => "__builtin_amdgcn_dot4_f32_bf8_fp8",
+    "llvm.amdgcn.dot4.f32.fp8.bf8" => "__builtin_amdgcn_dot4_f32_fp8_bf8",
+    "llvm.amdgcn.dot4.f32.fp8.fp8" => "__builtin_amdgcn_dot4_f32_fp8_fp8",
     "llvm.amdgcn.ds.add.gs.reg.rtn" => "__builtin_amdgcn_ds_add_gs_reg_rtn",
     "llvm.amdgcn.ds.bpermute" => "__builtin_amdgcn_ds_bpermute",
     "llvm.amdgcn.ds.fadd.v2bf16" => "__builtin_amdgcn_ds_atomic_fadd_v2bf16",
@@ -2291,6 +2295,10 @@ match name {
     "llvm.loongarch.csrxchg.d" => "__builtin_loongarch_csrxchg_d",
     "llvm.loongarch.csrxchg.w" => "__builtin_loongarch_csrxchg_w",
     "llvm.loongarch.dbar" => "__builtin_loongarch_dbar",
+    "llvm.loongarch.frecipe.d" => "__builtin_loongarch_frecipe_d",
+    "llvm.loongarch.frecipe.s" => "__builtin_loongarch_frecipe_s",
+    "llvm.loongarch.frsqrte.d" => "__builtin_loongarch_frsqrte_d",
+    "llvm.loongarch.frsqrte.s" => "__builtin_loongarch_frsqrte_s",
     "llvm.loongarch.ibar" => "__builtin_loongarch_ibar",
     "llvm.loongarch.iocsrrd.b" => "__builtin_loongarch_iocsrrd_b",
     "llvm.loongarch.iocsrrd.d" => "__builtin_loongarch_iocsrrd_d",
@@ -2529,6 +2537,8 @@ match name {
     "llvm.loongarch.lasx.xvfnmsub.s" => "__builtin_lasx_xvfnmsub_s",
     "llvm.loongarch.lasx.xvfrecip.d" => "__builtin_lasx_xvfrecip_d",
     "llvm.loongarch.lasx.xvfrecip.s" => "__builtin_lasx_xvfrecip_s",
+    "llvm.loongarch.lasx.xvfrecipe.d" => "__builtin_lasx_xvfrecipe_d",
+    "llvm.loongarch.lasx.xvfrecipe.s" => "__builtin_lasx_xvfrecipe_s",
     "llvm.loongarch.lasx.xvfrint.d" => "__builtin_lasx_xvfrint_d",
     "llvm.loongarch.lasx.xvfrint.s" => "__builtin_lasx_xvfrint_s",
     "llvm.loongarch.lasx.xvfrintrm.d" => "__builtin_lasx_xvfrintrm_d",
@@ -2541,6 +2551,8 @@ match name {
     "llvm.loongarch.lasx.xvfrintrz.s" => "__builtin_lasx_xvfrintrz_s",
     "llvm.loongarch.lasx.xvfrsqrt.d" => "__builtin_lasx_xvfrsqrt_d",
     "llvm.loongarch.lasx.xvfrsqrt.s" => "__builtin_lasx_xvfrsqrt_s",
+    "llvm.loongarch.lasx.xvfrsqrte.d" => "__builtin_lasx_xvfrsqrte_d",
+    "llvm.loongarch.lasx.xvfrsqrte.s" => "__builtin_lasx_xvfrsqrte_s",
     "llvm.loongarch.lasx.xvfrstp.b" => "__builtin_lasx_xvfrstp_b",
     "llvm.loongarch.lasx.xvfrstp.h" => "__builtin_lasx_xvfrstp_h",
     "llvm.loongarch.lasx.xvfrstpi.b" => "__builtin_lasx_xvfrstpi_b",
@@ -3255,6 +3267,8 @@ match name {
     "llvm.loongarch.lsx.vfnmsub.s" => "__builtin_lsx_vfnmsub_s",
     "llvm.loongarch.lsx.vfrecip.d" => "__builtin_lsx_vfrecip_d",
     "llvm.loongarch.lsx.vfrecip.s" => "__builtin_lsx_vfrecip_s",
+    "llvm.loongarch.lsx.vfrecipe.d" => "__builtin_lsx_vfrecipe_d",
+    "llvm.loongarch.lsx.vfrecipe.s" => "__builtin_lsx_vfrecipe_s",
     "llvm.loongarch.lsx.vfrint.d" => "__builtin_lsx_vfrint_d",
     "llvm.loongarch.lsx.vfrint.s" => "__builtin_lsx_vfrint_s",
     "llvm.loongarch.lsx.vfrintrm.d" => "__builtin_lsx_vfrintrm_d",
@@ -3267,6 +3281,8 @@ match name {
     "llvm.loongarch.lsx.vfrintrz.s" => "__builtin_lsx_vfrintrz_s",
     "llvm.loongarch.lsx.vfrsqrt.d" => "__builtin_lsx_vfrsqrt_d",
     "llvm.loongarch.lsx.vfrsqrt.s" => "__builtin_lsx_vfrsqrt_s",
+    "llvm.loongarch.lsx.vfrsqrte.d" => "__builtin_lsx_vfrsqrte_d",
+    "llvm.loongarch.lsx.vfrsqrte.s" => "__builtin_lsx_vfrsqrte_s",
     "llvm.loongarch.lsx.vfrstp.b" => "__builtin_lsx_vfrstp_b",
     "llvm.loongarch.lsx.vfrstp.h" => "__builtin_lsx_vfrstp_h",
     "llvm.loongarch.lsx.vfrstpi.b" => "__builtin_lsx_vfrstpi_b",
@@ -4434,6 +4450,7 @@ match name {
     "llvm.nvvm.abs.bf16x2" => "__nvvm_abs_bf16x2",
     "llvm.nvvm.abs.i" => "__nvvm_abs_i",
     "llvm.nvvm.abs.ll" => "__nvvm_abs_ll",
+    "llvm.nvvm.activemask" => "__nvvm_activemask",
     "llvm.nvvm.add.rm.d" => "__nvvm_add_rm_d",
     "llvm.nvvm.add.rm.f" => "__nvvm_add_rm_f",
     "llvm.nvvm.add.rm.ftz.f" => "__nvvm_add_rm_ftz_f",
@@ -4522,6 +4539,7 @@ match name {
     "llvm.nvvm.ex2.approx.d" => "__nvvm_ex2_approx_d",
     "llvm.nvvm.ex2.approx.f" => "__nvvm_ex2_approx_f",
     "llvm.nvvm.ex2.approx.ftz.f" => "__nvvm_ex2_approx_ftz_f",
+    "llvm.nvvm.exit" => "__nvvm_exit",
     "llvm.nvvm.f2bf16.rn" => "__nvvm_f2bf16_rn",
     "llvm.nvvm.f2bf16.rn.relu" => "__nvvm_f2bf16_rn_relu",
     "llvm.nvvm.f2bf16.rz" => "__nvvm_f2bf16_rz",
@@ -4722,8 +4740,11 @@ match name {
     "llvm.nvvm.mul24.ui" => "__nvvm_mul24_ui",
     "llvm.nvvm.mulhi.i" => "__nvvm_mulhi_i",
     "llvm.nvvm.mulhi.ll" => "__nvvm_mulhi_ll",
+    "llvm.nvvm.mulhi.s" => "__nvvm_mulhi_s",
     "llvm.nvvm.mulhi.ui" => "__nvvm_mulhi_ui",
     "llvm.nvvm.mulhi.ull" => "__nvvm_mulhi_ull",
+    "llvm.nvvm.mulhi.us" => "__nvvm_mulhi_us",
+    "llvm.nvvm.nanosleep" => "__nvvm_nanosleep",
     "llvm.nvvm.neg.bf16" => "__nvvm_neg_bf16",
     "llvm.nvvm.neg.bf16x2" => "__nvvm_neg_bf16x2",
     "llvm.nvvm.popc.i" => "__nvvm_popc_i",
@@ -4783,6 +4804,7 @@ match name {
     "llvm.nvvm.read.ptx.sreg.envreg7" => "__nvvm_read_ptx_sreg_envreg7",
     "llvm.nvvm.read.ptx.sreg.envreg8" => "__nvvm_read_ptx_sreg_envreg8",
     "llvm.nvvm.read.ptx.sreg.envreg9" => "__nvvm_read_ptx_sreg_envreg9",
+    "llvm.nvvm.read.ptx.sreg.globaltimer" => "__nvvm_read_ptx_sreg_globaltimer",
     "llvm.nvvm.read.ptx.sreg.gridid" => "__nvvm_read_ptx_sreg_gridid",
     // [DUPLICATE]: "llvm.nvvm.read.ptx.sreg.gridid" => "__nvvm_read_ptx_sreg_",
     "llvm.nvvm.read.ptx.sreg.laneid" => "__nvvm_read_ptx_sreg_laneid",
@@ -4835,6 +4857,7 @@ match name {
     "llvm.nvvm.redux.sync.umax" => "__nvvm_redux_sync_umax",
     "llvm.nvvm.redux.sync.umin" => "__nvvm_redux_sync_umin",
     "llvm.nvvm.redux.sync.xor" => "__nvvm_redux_sync_xor",
+    "llvm.nvvm.reflect" => "__nvvm_reflect",
     "llvm.nvvm.rotate.b32" => "__nvvm_rotate_b32",
     "llvm.nvvm.rotate.b64" => "__nvvm_rotate_b64",
     "llvm.nvvm.rotate.right.b64" => "__nvvm_rotate_right_b64",
@@ -4845,7 +4868,11 @@ match name {
     "llvm.nvvm.rsqrt.approx.f" => "__nvvm_rsqrt_approx_f",
     "llvm.nvvm.rsqrt.approx.ftz.f" => "__nvvm_rsqrt_approx_ftz_f",
     "llvm.nvvm.sad.i" => "__nvvm_sad_i",
+    "llvm.nvvm.sad.ll" => "__nvvm_sad_ll",
+    "llvm.nvvm.sad.s" => "__nvvm_sad_s",
     "llvm.nvvm.sad.ui" => "__nvvm_sad_ui",
+    "llvm.nvvm.sad.ull" => "__nvvm_sad_ull",
+    "llvm.nvvm.sad.us" => "__nvvm_sad_us",
     "llvm.nvvm.saturate.d" => "__nvvm_saturate_d",
     "llvm.nvvm.saturate.f" => "__nvvm_saturate_f",
     "llvm.nvvm.saturate.ftz.f" => "__nvvm_saturate_ftz_f",
@@ -5471,6 +5498,7 @@ match name {
     "llvm.ppc.fctiwz" => "__builtin_ppc_fctiwz",
     "llvm.ppc.fctudz" => "__builtin_ppc_fctudz",
     "llvm.ppc.fctuwz" => "__builtin_ppc_fctuwz",
+    "llvm.ppc.fence" => "__builtin_ppc_fence",
     "llvm.ppc.fmaf128.round.to.odd" => "__builtin_fmaf128_round_to_odd",
     "llvm.ppc.fmsub" => "__builtin_ppc_fmsub",
     "llvm.ppc.fmsubs" => "__builtin_ppc_fmsubs",
@@ -5599,6 +5627,9 @@ match name {
     "llvm.ppc.qpx.qvstfs" => "__builtin_qpx_qvstfs",
     "llvm.ppc.qpx.qvstfsa" => "__builtin_qpx_qvstfsa",
     "llvm.ppc.readflm" => "__builtin_readflm",
+    "llvm.ppc.rldimi" => "__builtin_ppc_rldimi",
+    "llvm.ppc.rlwimi" => "__builtin_ppc_rlwimi",
+    "llvm.ppc.rlwnm" => "__builtin_ppc_rlwnm",
     "llvm.ppc.scalar.extract.expq" => "__builtin_vsx_scalar_extract_expq",
     "llvm.ppc.scalar.insert.exp.qp" => "__builtin_vsx_scalar_insert_exp_qp",
     "llvm.ppc.set.texasr" => "__builtin_set_texasr",
@@ -5912,6 +5943,8 @@ match name {
     "llvm.s390.vupllb" => "__builtin_s390_vupllb",
     "llvm.s390.vupllf" => "__builtin_s390_vupllf",
     "llvm.s390.vupllh" => "__builtin_s390_vupllh",
+    // spv
+    "llvm.spv.create.handle" => "__builtin_hlsl_create_handle",
     // ve
     "llvm.ve.vl.andm.MMM" => "__builtin_ve_vl_andm_MMM",
     "llvm.ve.vl.andm.mmm" => "__builtin_ve_vl_andm_mmm",
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs b/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs
index ce8dee69a98..a1270482219 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs
@@ -15,7 +15,7 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
     // Some LLVM intrinsics do not map 1-to-1 to GCC intrinsics, so we add the missing
     // arguments here.
     if gcc_func.get_param_count() != args.len() {
-        match &*func_name {
+        match func_name {
             // NOTE: the following intrinsics have a different number of parameters in LLVM and GCC.
             "__builtin_ia32_prold512_mask"
             | "__builtin_ia32_pmuldq512_mask"
@@ -380,7 +380,7 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
             _ => (),
         }
     } else {
-        match &*func_name {
+        match func_name {
             "__builtin_ia32_rndscaless_mask_round" | "__builtin_ia32_rndscalesd_mask_round" => {
                 let new_args = args.to_vec();
                 let arg3_type = gcc_func.get_param_type(2);
@@ -629,17 +629,22 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
 
 #[cfg(feature = "master")]
 pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function<'gcc> {
-    match name {
+    let gcc_name = match name {
         "llvm.prefetch" => {
             let gcc_name = "__builtin_prefetch";
             let func = cx.context.get_builtin_function(gcc_name);
             cx.functions.borrow_mut().insert(gcc_name.to_string(), func);
             return func;
         }
-        _ => (),
-    }
 
-    let gcc_name = match name {
+        "llvm.aarch64.isb" => {
+            // FIXME: GCC doesn't support __builtin_arm_isb yet, check if this builtin is OK.
+            let gcc_name = "__atomic_thread_fence";
+            let func = cx.context.get_builtin_function(gcc_name);
+            cx.functions.borrow_mut().insert(gcc_name.to_string(), func);
+            return func;
+        }
+
         "llvm.x86.xgetbv" => "__builtin_ia32_xgetbv",
         // NOTE: this doc specifies the equivalent GCC builtins: http://huonw.github.io/llvmint/llvmint/x86/index.html
         "llvm.sqrt.v2f64" => "__builtin_ia32_sqrtpd",
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
index 43f12b514af..9352a67e362 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
@@ -91,7 +91,7 @@ fn get_simple_intrinsic<'gcc, 'tcx>(
         sym::abort => "abort",
         _ => return None,
     };
-    Some(cx.context.get_builtin_function(&gcc_name))
+    Some(cx.context.get_builtin_function(gcc_name))
 }
 
 impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
@@ -122,10 +122,17 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
         let result = PlaceRef::new_sized(llresult, fn_abi.ret.layout);
 
         let simple = get_simple_intrinsic(self, name);
+
+        // FIXME(tempdragon): Re-enable `clippy::suspicious_else_formatting` if the following issue is solved:
+        // https://github.com/rust-lang/rust-clippy/issues/12497
+        // and leave `else if use_integer_compare` to be placed "as is".
+        #[allow(clippy::suspicious_else_formatting)]
         let llval = match name {
             _ if simple.is_some() => {
                 // FIXME(antoyo): remove this cast when the API supports function.
-                let func = unsafe { std::mem::transmute(simple.expect("simple")) };
+                let func = unsafe {
+                    std::mem::transmute::<Function<'gcc>, RValue<'gcc>>(simple.expect("simple"))
+                };
                 self.call(
                     self.type_void(),
                     None,
@@ -167,7 +174,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
             sym::volatile_load | sym::unaligned_volatile_load => {
                 let tp_ty = fn_args.type_at(0);
                 let ptr = args[0].immediate();
-                let load = if let PassMode::Cast { cast: ty, pad_i32: _ } = &fn_abi.ret.mode {
+                let load = if let PassMode::Cast { cast: ref ty, pad_i32: _ } = fn_abi.ret.mode {
                     let gcc_ty = ty.gcc_type(self);
                     self.volatile_load(gcc_ty, ptr)
                 } else {
@@ -213,12 +220,12 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
                             let after_block = func.new_block("after");
 
                             let arg = args[0].immediate();
-                            let result = func.new_local(None, arg.get_type(), "zeros");
+                            let result = func.new_local(None, self.u32_type, "zeros");
                             let zero = self.cx.gcc_zero(arg.get_type());
                             let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero);
                             self.llbb().end_with_conditional(None, cond, then_block, else_block);
 
-                            let zero_result = self.cx.gcc_uint(arg.get_type(), width);
+                            let zero_result = self.cx.gcc_uint(self.u32_type, width);
                             then_block.add_assignment(None, result, zero_result);
                             then_block.end_with_jump(None, after_block);
 
@@ -386,7 +393,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
         };
 
         if !fn_abi.ret.is_ignore() {
-            if let PassMode::Cast { cast: ty, .. } = &fn_abi.ret.mode {
+            if let PassMode::Cast { cast: ref ty, .. } = fn_abi.ret.mode {
                 let ptr_llty = self.type_ptr_to(ty.gcc_type(self));
                 let ptr = self.pointercast(result.val.llval, ptr_llty);
                 self.store(llval, ptr, result.val.align);
@@ -592,7 +599,7 @@ fn int_type_width_signed<'gcc, 'tcx>(
     ty: Ty<'tcx>,
     cx: &CodegenCx<'gcc, 'tcx>,
 ) -> Option<(u64, bool)> {
-    match ty.kind() {
+    match *ty.kind() {
         ty::Int(t) => Some((
             match t {
                 rustc_middle::ty::IntTy::Isize => u64::from(cx.tcx.sess.target.pointer_width),
@@ -698,16 +705,17 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
     fn count_leading_zeroes(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
         // TODO(antoyo): use width?
         let arg_type = arg.get_type();
+        let result_type = self.u32_type;
         let count_leading_zeroes =
             // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here
             // instead of using is_uint().
-            if arg_type.is_uint(&self.cx) {
+            if arg_type.is_uint(self.cx) {
                 "__builtin_clz"
             }
-            else if arg_type.is_ulong(&self.cx) {
+            else if arg_type.is_ulong(self.cx) {
                 "__builtin_clzl"
             }
-            else if arg_type.is_ulonglong(&self.cx) {
+            else if arg_type.is_ulonglong(self.cx) {
                 "__builtin_clzll"
             }
             else if width == 128 {
@@ -755,7 +763,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
                 let res = self.context.new_array_access(self.location, result, index);
 
-                return self.gcc_int_cast(res.to_rvalue(), arg_type);
+                return self.gcc_int_cast(res.to_rvalue(), result_type);
             }
             else {
                 let count_leading_zeroes = self.context.get_builtin_function("__builtin_clzll");
@@ -763,17 +771,18 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
                 let diff = self.ulonglong_type.get_size() as i64 - arg_type.get_size() as i64;
                 let diff = self.context.new_rvalue_from_long(self.int_type, diff * 8);
                 let res = self.context.new_call(self.location, count_leading_zeroes, &[arg]) - diff;
-                return self.context.new_cast(self.location, res, arg_type);
+                return self.context.new_cast(self.location, res, result_type);
             };
         let count_leading_zeroes = self.context.get_builtin_function(count_leading_zeroes);
         let res = self.context.new_call(self.location, count_leading_zeroes, &[arg]);
-        self.context.new_cast(self.location, res, arg_type)
+        self.context.new_cast(self.location, res, result_type)
     }
 
     fn count_trailing_zeroes(&mut self, _width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
-        let result_type = arg.get_type();
-        let arg = if result_type.is_signed(self.cx) {
-            let new_type = result_type.to_unsigned(self.cx);
+        let arg_type = arg.get_type();
+        let result_type = self.u32_type;
+        let arg = if arg_type.is_signed(self.cx) {
+            let new_type = arg_type.to_unsigned(self.cx);
             self.gcc_int_cast(arg, new_type)
         } else {
             arg
@@ -782,17 +791,17 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         let (count_trailing_zeroes, expected_type) =
             // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here
             // instead of using is_uint().
-            if arg_type.is_uchar(&self.cx) || arg_type.is_ushort(&self.cx) || arg_type.is_uint(&self.cx) {
+            if arg_type.is_uchar(self.cx) || arg_type.is_ushort(self.cx) || arg_type.is_uint(self.cx) {
                 // NOTE: we don't need to & 0xFF for uchar because the result is undefined on zero.
                 ("__builtin_ctz", self.cx.uint_type)
             }
-            else if arg_type.is_ulong(&self.cx) {
+            else if arg_type.is_ulong(self.cx) {
                 ("__builtin_ctzl", self.cx.ulong_type)
             }
-            else if arg_type.is_ulonglong(&self.cx) {
+            else if arg_type.is_ulonglong(self.cx) {
                 ("__builtin_ctzll", self.cx.ulonglong_type)
             }
-            else if arg_type.is_u128(&self.cx) {
+            else if arg_type.is_u128(self.cx) {
                 // Adapted from the algorithm to count leading zeroes from: https://stackoverflow.com/a/28433850/389119
                 let array_type = self.context.new_array_type(None, arg_type, 3);
                 let result = self.current_func()
@@ -863,18 +872,16 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
     fn pop_count(&mut self, value: RValue<'gcc>) -> RValue<'gcc> {
         // TODO(antoyo): use the optimized version with fewer operations.
-        let result_type = value.get_type();
-        let value_type = result_type.to_unsigned(self.cx);
+        let result_type = self.u32_type;
+        let arg_type = value.get_type();
+        let value_type = arg_type.to_unsigned(self.cx);
 
-        let value = if result_type.is_signed(self.cx) {
-            self.gcc_int_cast(value, value_type)
-        } else {
-            value
-        };
+        let value =
+            if arg_type.is_signed(self.cx) { self.gcc_int_cast(value, value_type) } else { value };
 
         // only break apart 128-bit ints if they're not natively supported
         // TODO(antoyo): remove this if/when native 128-bit integers land in libgccjit
-        if value_type.is_u128(&self.cx) && !self.cx.supports_128bit_integers {
+        if value_type.is_u128(self.cx) && !self.cx.supports_128bit_integers {
             let sixty_four = self.gcc_int(value_type, 64);
             let right_shift = self.gcc_lshr(value, sixty_four);
             let high = self.gcc_int_cast(right_shift, self.cx.ulonglong_type);
@@ -997,7 +1004,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
             // Return `result_type`'s maximum or minimum value on overflow
             // NOTE: convert the type to unsigned to have an unsigned shift.
-            let unsigned_type = result_type.to_unsigned(&self.cx);
+            let unsigned_type = result_type.to_unsigned(self.cx);
             let shifted = self.gcc_lshr(
                 self.gcc_int_cast(lhs, unsigned_type),
                 self.gcc_int(unsigned_type, width as i64 - 1),
@@ -1189,7 +1196,7 @@ fn codegen_gnu_try<'gcc>(
         bx.invoke(try_func_ty, None, None, try_func, &[data], then, catch, None, None);
     });
 
-    let func = unsafe { std::mem::transmute(func) };
+    let func = unsafe { std::mem::transmute::<Function<'gcc>, RValue<'gcc>>(func) };
 
     // Note that no invoke is used here because by definition this function
     // can't panic (that's what it's catching).
@@ -1263,7 +1270,7 @@ fn gen_fn<'a, 'gcc, 'tcx>(
     // FIXME(eddyb) find a nicer way to do this.
     cx.linkage.set(FunctionType::Internal);
     let func = cx.declare_fn(name, fn_abi);
-    let func_val = unsafe { std::mem::transmute(func) };
+    let func_val = unsafe { std::mem::transmute::<Function<'gcc>, RValue<'gcc>>(func) };
     cx.set_frame_pointer_type(func_val);
     cx.apply_target_cpu_attr(func_val);
     let block = Builder::append_block(cx, func_val, "entry-block");
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
index 1625e6ecdb7..ba214a9c24c 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
@@ -13,6 +13,7 @@ use rustc_codegen_ssa::errors::InvalidMonomorphization;
 use rustc_codegen_ssa::mir::operand::OperandRef;
 use rustc_codegen_ssa::mir::place::PlaceRef;
 use rustc_codegen_ssa::traits::{BaseTypeMethods, BuilderMethods};
+#[cfg(feature = "master")]
 use rustc_hir as hir;
 use rustc_middle::mir::BinOp;
 use rustc_middle::span_bug;
@@ -72,11 +73,11 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         let expected_bytes = len / 8 + ((len % 8 > 0) as u64);
 
         let mask_ty = arg_tys[0];
-        let mut mask = match mask_ty.kind() {
+        let mut mask = match *mask_ty.kind() {
             ty::Int(i) if i.bit_width() == Some(expected_int_bits) => args[0].immediate(),
             ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => args[0].immediate(),
             ty::Array(elem, len)
-                if matches!(elem.kind(), ty::Uint(ty::UintTy::U8))
+                if matches!(*elem.kind(), ty::Uint(ty::UintTy::U8))
                     && len.try_eval_target_usize(bx.tcx, ty::ParamEnv::reveal_all())
                         == Some(expected_bytes) =>
             {
@@ -309,10 +310,9 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                 })
                 .collect();
             return Ok(bx.context.new_rvalue_from_vector(None, v_type, &elems));
-        } else {
-            // avoid the unnecessary truncation as an optimization.
-            return Ok(bx.context.new_bitcast(None, result, v_type));
         }
+        // avoid the unnecessary truncation as an optimization.
+        return Ok(bx.context.new_bitcast(None, result, v_type));
     }
     // since gcc doesn't have vector shuffle methods available in non-patched builds, fallback to
     // component-wise bitreverses if they're not available.
@@ -342,11 +342,13 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             .map(|i| {
                 let index = bx.context.new_rvalue_from_long(bx.i32_type, i as i64);
                 let value = bx.extract_element(vector, index).to_rvalue();
-                if name == sym::simd_ctlz {
-                    bx.count_leading_zeroes(value.get_type().get_size() as u64 * 8, value)
+                let value_type = value.get_type();
+                let element = if name == sym::simd_ctlz {
+                    bx.count_leading_zeroes(value_type.get_size() as u64 * 8, value)
                 } else {
-                    bx.count_trailing_zeroes(value.get_type().get_size() as u64 * 8, value)
-                }
+                    bx.count_trailing_zeroes(value_type.get_size() as u64 * 8, value)
+                };
+                bx.context.new_cast(None, element, value_type)
             })
             .collect();
         return Ok(bx.context.new_rvalue_from_vector(None, vector.get_type(), &elements));
@@ -355,8 +357,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
     if name == sym::simd_shuffle {
         // Make sure this is actually an array, since typeck only checks the length-suffixed
         // version of this intrinsic.
-        let n: u64 = match args[2].layout.ty.kind() {
-            ty::Array(ty, len) if matches!(ty.kind(), ty::Uint(ty::UintTy::U32)) => {
+        let n: u64 = match *args[2].layout.ty.kind() {
+            ty::Array(ty, len) if matches!(*ty.kind(), ty::Uint(ty::UintTy::U32)) => {
                 len.try_eval_target_usize(bx.cx.tcx, ty::ParamEnv::reveal_all()).unwrap_or_else(
                     || span_bug!(span, "could not evaluate shuffle index array length"),
                 )
@@ -429,13 +431,148 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             m_len == v_len,
             InvalidMonomorphization::MismatchedLengths { span, name, m_len, v_len }
         );
-        match m_elem_ty.kind() {
+        match *m_elem_ty.kind() {
             ty::Int(_) => {}
             _ => return_error!(InvalidMonomorphization::MaskType { span, name, ty: m_elem_ty }),
         }
         return Ok(bx.vector_select(args[0].immediate(), args[1].immediate(), args[2].immediate()));
     }
 
+    if name == sym::simd_cast_ptr {
+        require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty });
+        let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());
+
+        require!(
+            in_len == out_len,
+            InvalidMonomorphization::ReturnLengthInputType {
+                span,
+                name,
+                in_len,
+                in_ty,
+                ret_ty,
+                out_len
+            }
+        );
+
+        match *in_elem.kind() {
+            ty::RawPtr(p_ty, _) => {
+                let metadata = p_ty.ptr_metadata_ty(bx.tcx, |ty| {
+                    bx.tcx.normalize_erasing_regions(ty::ParamEnv::reveal_all(), ty)
+                });
+                require!(
+                    metadata.is_unit(),
+                    InvalidMonomorphization::CastFatPointer { span, name, ty: in_elem }
+                );
+            }
+            _ => {
+                return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: in_elem })
+            }
+        }
+        match *out_elem.kind() {
+            ty::RawPtr(p_ty, _) => {
+                let metadata = p_ty.ptr_metadata_ty(bx.tcx, |ty| {
+                    bx.tcx.normalize_erasing_regions(ty::ParamEnv::reveal_all(), ty)
+                });
+                require!(
+                    metadata.is_unit(),
+                    InvalidMonomorphization::CastFatPointer { span, name, ty: out_elem }
+                );
+            }
+            _ => {
+                return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: out_elem })
+            }
+        }
+
+        let arg = args[0].immediate();
+        let elem_type = llret_ty.dyncast_vector().expect("vector return type").get_element_type();
+        let values: Vec<_> = (0..in_len)
+            .map(|i| {
+                let idx = bx.gcc_int(bx.usize_type, i as _);
+                let value = bx.extract_element(arg, idx);
+                bx.pointercast(value, elem_type)
+            })
+            .collect();
+        return Ok(bx.context.new_rvalue_from_vector(bx.location, llret_ty, &values));
+    }
+
+    if name == sym::simd_expose_provenance {
+        require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty });
+        let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());
+
+        require!(
+            in_len == out_len,
+            InvalidMonomorphization::ReturnLengthInputType {
+                span,
+                name,
+                in_len,
+                in_ty,
+                ret_ty,
+                out_len
+            }
+        );
+
+        match *in_elem.kind() {
+            ty::RawPtr(_, _) => {}
+            _ => {
+                return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: in_elem })
+            }
+        }
+        match *out_elem.kind() {
+            ty::Uint(ty::UintTy::Usize) => {}
+            _ => return_error!(InvalidMonomorphization::ExpectedUsize { span, name, ty: out_elem }),
+        }
+
+        let arg = args[0].immediate();
+        let elem_type = llret_ty.dyncast_vector().expect("vector return type").get_element_type();
+        let values: Vec<_> = (0..in_len)
+            .map(|i| {
+                let idx = bx.gcc_int(bx.usize_type, i as _);
+                let value = bx.extract_element(arg, idx);
+                bx.ptrtoint(value, elem_type)
+            })
+            .collect();
+        return Ok(bx.context.new_rvalue_from_vector(bx.location, llret_ty, &values));
+    }
+
+    if name == sym::simd_with_exposed_provenance {
+        require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty });
+        let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());
+
+        require!(
+            in_len == out_len,
+            InvalidMonomorphization::ReturnLengthInputType {
+                span,
+                name,
+                in_len,
+                in_ty,
+                ret_ty,
+                out_len
+            }
+        );
+
+        match *in_elem.kind() {
+            ty::Uint(ty::UintTy::Usize) => {}
+            _ => return_error!(InvalidMonomorphization::ExpectedUsize { span, name, ty: in_elem }),
+        }
+        match *out_elem.kind() {
+            ty::RawPtr(_, _) => {}
+            _ => {
+                return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: out_elem })
+            }
+        }
+
+        let arg = args[0].immediate();
+        let elem_type = llret_ty.dyncast_vector().expect("vector return type").get_element_type();
+        let values: Vec<_> = (0..in_len)
+            .map(|i| {
+                let idx = bx.gcc_int(bx.usize_type, i as _);
+                let value = bx.extract_element(arg, idx);
+                bx.inttoptr(value, elem_type)
+            })
+            .collect();
+        return Ok(bx.context.new_rvalue_from_vector(bx.location, llret_ty, &values));
+    }
+
     #[cfg(feature = "master")]
     if name == sym::simd_cast || name == sym::simd_as {
         require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty });
@@ -462,13 +599,13 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             Unsupported,
         }
 
-        let in_style = match in_elem.kind() {
+        let in_style = match *in_elem.kind() {
             ty::Int(_) | ty::Uint(_) => Style::Int,
             ty::Float(_) => Style::Float,
             _ => Style::Unsupported,
         };
 
-        let out_style = match out_elem.kind() {
+        let out_style = match *out_elem.kind() {
             ty::Int(_) | ty::Uint(_) => Style::Int,
             ty::Float(_) => Style::Float,
             _ => Style::Unsupported,
@@ -495,7 +632,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
     macro_rules! arith_binary {
         ($($name: ident: $($($p: ident),* => $call: ident),*;)*) => {
             $(if name == sym::$name {
-                match in_elem.kind() {
+                match *in_elem.kind() {
                     $($(ty::$p(_))|* => {
                         return Ok(bx.$call(args[0].immediate(), args[1].immediate()))
                     })*
@@ -533,7 +670,6 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         let sign_shift = bx.context.new_rvalue_from_int(elem_type, elem_size as i32 - 1);
         let one = bx.context.new_rvalue_one(elem_type);
 
-        let mut shift = 0;
         for i in 0..in_len {
             let elem =
                 bx.extract_element(vector, bx.context.new_rvalue_from_int(bx.int_type, i as i32));
@@ -541,17 +677,16 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             let masked = shifted & one;
             result = result
                 | (bx.context.new_cast(None, masked, result_type)
-                    << bx.context.new_rvalue_from_int(result_type, shift));
-            shift += 1;
+                    << bx.context.new_rvalue_from_int(result_type, i as i32));
         }
 
-        match ret_ty.kind() {
+        match *ret_ty.kind() {
             ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => {
                 // Zero-extend iN to the bitmask type:
                 return Ok(result);
             }
             ty::Array(elem, len)
-                if matches!(elem.kind(), ty::Uint(ty::UintTy::U8))
+                if matches!(*elem.kind(), ty::Uint(ty::UintTy::U8))
                     && len.try_eval_target_usize(bx.tcx, ty::ParamEnv::reveal_all())
                         == Some(expected_bytes) =>
             {
@@ -590,7 +725,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                 return Err(());
             }};
         }
-        let (elem_ty_str, elem_ty) = if let ty::Float(f) = in_elem.kind() {
+        let (elem_ty_str, elem_ty) = if let ty::Float(ref f) = *in_elem.kind() {
             let elem_ty = bx.cx.type_float_from_ty(*f);
             match f.bit_width() {
                 32 => ("f", elem_ty),
@@ -816,7 +951,9 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         let (_, element_ty0) = arg_tys[0].simd_size_and_type(bx.tcx());
         let (_, element_ty1) = arg_tys[1].simd_size_and_type(bx.tcx());
         let (pointer_count, underlying_ty) = match *element_ty1.kind() {
-            ty::RawPtr(p_ty, _) if p_ty == in_elem => (ptr_count(element_ty1), non_ptr(element_ty1)),
+            ty::RawPtr(p_ty, _) if p_ty == in_elem => {
+                (ptr_count(element_ty1), non_ptr(element_ty1))
+            }
             _ => {
                 require!(
                     false,
@@ -839,7 +976,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
 
         // The element type of the third argument must be a signed integer type of any width:
         let (_, element_ty2) = arg_tys[2].simd_size_and_type(bx.tcx());
-        match element_ty2.kind() {
+        match *element_ty2.kind() {
             ty::Int(_) => (),
             _ => {
                 require!(
@@ -955,7 +1092,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         assert_eq!(underlying_ty, non_ptr(element_ty0));
 
         // The element type of the third argument must be a signed integer type of any width:
-        match element_ty2.kind() {
+        match *element_ty2.kind() {
             ty::Int(_) => (),
             _ => {
                 require!(
@@ -1013,7 +1150,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
     macro_rules! arith_unary {
         ($($name: ident: $($($p: ident),* => $call: ident),*;)*) => {
             $(if name == sym::$name {
-                match in_elem.kind() {
+                match *in_elem.kind() {
                     $($(ty::$p(_))|* => {
                         return Ok(bx.$call(args[0].immediate()))
                     })*
@@ -1137,7 +1274,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                     ret_ty == in_elem,
                     InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
                 );
-                return match in_elem.kind() {
+                return match *in_elem.kind() {
                     ty::Int(_) | ty::Uint(_) => {
                         let r = bx.vector_reduce_op(args[0].immediate(), $vec_op);
                         if $ordered {
@@ -1206,7 +1343,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                     ret_ty == in_elem,
                     InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
                 );
-                return match in_elem.kind() {
+                return match *in_elem.kind() {
                     ty::Int(_) | ty::Uint(_) => Ok(bx.$int_red(args[0].immediate())),
                     ty::Float(_) => Ok(bx.$float_red(args[0].immediate())),
                     _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
@@ -1235,7 +1372,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                     );
                     args[0].immediate()
                 } else {
-                    match in_elem.kind() {
+                    match *in_elem.kind() {
                         ty::Int(_) | ty::Uint(_) => {}
                         _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
                             span,
@@ -1249,7 +1386,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
 
                     args[0].immediate()
                 };
-                return match in_elem.kind() {
+                return match *in_elem.kind() {
                     ty::Int(_) | ty::Uint(_) => {
                         let r = bx.vector_reduce_op(input, $op);
                         Ok(if !$boolean {
diff --git a/compiler/rustc_codegen_gcc/src/lib.rs b/compiler/rustc_codegen_gcc/src/lib.rs
index 24856506c46..1132b0cd2f5 100644
--- a/compiler/rustc_codegen_gcc/src/lib.rs
+++ b/compiler/rustc_codegen_gcc/src/lib.rs
@@ -4,7 +4,7 @@
  * TODO(antoyo): support LTO (gcc's equivalent to Full LTO is -flto -flto-partition=one — https://documentation.suse.com/sbp/all/html/SBP-GCC-10/index.html).
  * For Thin LTO, this might be helpful:
  * In gcc 4.6 -fwhopr was removed and became default with -flto. The non-whopr path can still be executed via -flto-partition=none.
- * Or the new incremental LTO?
+ * Or the new incremental LTO (https://www.phoronix.com/news/GCC-Incremental-LTO-Patches)?
  *
  * Maybe some missing optizations enabled by rustc's LTO is in there: https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html
  * Like -fipa-icf (should be already enabled) and maybe -fdevirtualize-at-ltrans.
@@ -16,12 +16,13 @@
 #![allow(internal_features)]
 #![doc(rust_logo)]
 #![feature(rustdoc_internals)]
-#![feature(rustc_private, decl_macro, never_type, trusted_len, hash_raw_entry)]
+#![feature(rustc_private, decl_macro, never_type, trusted_len, hash_raw_entry, let_chains)]
 #![allow(broken_intra_doc_links)]
 #![recursion_limit = "256"]
 #![warn(rust_2018_idioms)]
 #![warn(unused_lifetimes)]
 #![deny(clippy::pattern_type_mismatch)]
+#![allow(clippy::needless_lifetimes)]
 
 extern crate rustc_apfloat;
 extern crate rustc_ast;
@@ -73,6 +74,7 @@ mod type_of;
 
 use std::any::Any;
 use std::fmt::Debug;
+use std::ops::Deref;
 #[cfg(not(feature = "master"))]
 use std::sync::atomic::AtomicBool;
 #[cfg(not(feature = "master"))]
@@ -80,8 +82,9 @@ use std::sync::atomic::Ordering;
 use std::sync::Arc;
 use std::sync::Mutex;
 
+use back::lto::ThinBuffer;
+use back::lto::ThinData;
 use errors::LTONotSupported;
-#[cfg(not(feature = "master"))]
 use gccjit::CType;
 use gccjit::{Context, OptimizationLevel};
 #[cfg(feature = "master")]
@@ -92,9 +95,7 @@ use rustc_codegen_ssa::back::write::{
     CodegenContext, FatLtoInput, ModuleConfig, TargetMachineFactoryFn,
 };
 use rustc_codegen_ssa::base::codegen_crate;
-use rustc_codegen_ssa::traits::{
-    CodegenBackend, ExtraBackendMethods, ThinBufferMethods, WriteBackendMethods,
-};
+use rustc_codegen_ssa::traits::{CodegenBackend, ExtraBackendMethods, WriteBackendMethods};
 use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen};
 use rustc_data_structures::fx::FxIndexMap;
 use rustc_data_structures::sync::IntoDynSyncSend;
@@ -139,6 +140,10 @@ impl TargetInfo {
     fn supports_128bit_int(&self) -> bool {
         self.supports_128bit_integers.load(Ordering::SeqCst)
     }
+
+    fn supports_target_dependent_type(&self, _typ: CType) -> bool {
+        false
+    }
 }
 
 #[derive(Clone)]
@@ -160,6 +165,10 @@ impl LockedTargetInfo {
     fn supports_128bit_int(&self) -> bool {
         self.info.lock().expect("lock").supports_128bit_int()
     }
+
+    fn supports_target_dependent_type(&self, typ: CType) -> bool {
+        self.info.lock().expect("lock").supports_target_dependent_type(typ)
+    }
 }
 
 #[derive(Clone)]
@@ -188,6 +197,7 @@ impl CodegenBackend for GccCodegenBackend {
 
         #[cfg(feature = "master")]
         gccjit::set_global_personality_function_name(b"rust_eh_personality\0");
+
         if sess.lto() == Lto::Thin {
             sess.dcx().emit_warn(LTONotSupported {});
         }
@@ -293,7 +303,7 @@ impl ExtraBackendMethods for GccCodegenBackend {
         alloc_error_handler_kind: AllocatorKind,
     ) -> Self::Module {
         let mut mods = GccContext {
-            context: new_context(tcx),
+            context: Arc::new(SyncContext::new(new_context(tcx))),
             should_combine_object_files: false,
             temp_dir: None,
         };
@@ -323,35 +333,42 @@ impl ExtraBackendMethods for GccCodegenBackend {
     }
 }
 
-pub struct ThinBuffer;
+pub struct GccContext {
+    context: Arc<SyncContext>,
+    should_combine_object_files: bool,
+    // Temporary directory used by LTO. We keep it here so that it's not removed before linking.
+    temp_dir: Option<TempDir>,
+}
 
-impl ThinBufferMethods for ThinBuffer {
-    fn data(&self) -> &[u8] {
-        unimplemented!();
-    }
+struct SyncContext {
+    context: Context<'static>,
+}
 
-    fn thin_link_data(&self) -> &[u8] {
-        unimplemented!();
+impl SyncContext {
+    fn new(context: Context<'static>) -> Self {
+        Self { context }
     }
 }
 
-pub struct GccContext {
-    context: Context<'static>,
-    should_combine_object_files: bool,
-    // Temporary directory used by LTO. We keep it here so that it's not removed before linking.
-    temp_dir: Option<TempDir>,
+impl Deref for SyncContext {
+    type Target = Context<'static>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.context
+    }
 }
 
-unsafe impl Send for GccContext {}
-// FIXME(antoyo): that shouldn't be Sync. Parallel compilation is currently disabled with "-Zno-parallel-llvm". Try to disable it here.
-unsafe impl Sync for GccContext {}
+unsafe impl Send for SyncContext {}
+// FIXME(antoyo): that shouldn't be Sync. Parallel compilation is currently disabled with "-Zno-parallel-llvm".
+// TODO: disable it here by returing false in CodegenBackend::supports_parallel().
+unsafe impl Sync for SyncContext {}
 
 impl WriteBackendMethods for GccCodegenBackend {
     type Module = GccContext;
     type TargetMachine = ();
     type TargetMachineError = ();
     type ModuleBuffer = ModuleBuffer;
-    type ThinData = ();
+    type ThinData = ThinData;
     type ThinBuffer = ThinBuffer;
 
     fn run_fat_lto(
@@ -363,11 +380,11 @@ impl WriteBackendMethods for GccCodegenBackend {
     }
 
     fn run_thin_lto(
-        _cgcx: &CodegenContext<Self>,
-        _modules: Vec<(String, Self::ThinBuffer)>,
-        _cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
+        cgcx: &CodegenContext<Self>,
+        modules: Vec<(String, Self::ThinBuffer)>,
+        cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
     ) -> Result<(Vec<LtoModuleCodegen<Self>>, Vec<WorkProduct>), FatalError> {
-        unimplemented!();
+        back::lto::run_thin(cgcx, modules, cached_modules)
     }
 
     fn print_pass_timings(&self) {
@@ -397,10 +414,10 @@ impl WriteBackendMethods for GccCodegenBackend {
     }
 
     unsafe fn optimize_thin(
-        _cgcx: &CodegenContext<Self>,
-        _thin: ThinModule<Self>,
+        cgcx: &CodegenContext<Self>,
+        thin: ThinModule<Self>,
     ) -> Result<ModuleCodegen<Self::Module>, FatalError> {
-        unimplemented!();
+        back::lto::optimize_thin_module(thin, cgcx)
     }
 
     unsafe fn codegen(
@@ -413,10 +430,10 @@ impl WriteBackendMethods for GccCodegenBackend {
     }
 
     fn prepare_thin(
-        _module: ModuleCodegen<Self::Module>,
-        _emit_summary: bool,
+        module: ModuleCodegen<Self::Module>,
+        emit_summary: bool,
     ) -> (String, Self::ThinBuffer) {
-        unimplemented!();
+        back::lto::prepare_thin(module, emit_summary)
     }
 
     fn serialize_module(_module: ModuleCodegen<Self::Module>) -> (String, Self::ModuleBuffer) {
@@ -437,7 +454,8 @@ impl WriteBackendMethods for GccCodegenBackend {
 pub fn __rustc_codegen_backend() -> Box<dyn CodegenBackend> {
     #[cfg(feature = "master")]
     let info = {
-        // Check whether the target supports 128-bit integers.
+        // Check whether the target supports 128-bit integers, and sized floating point types (like
+        // Float16).
         let context = Context::default();
         Arc::new(Mutex::new(IntoDynSyncSend(context.get_target_info())))
     };
@@ -467,6 +485,7 @@ pub fn target_features(
     allow_unstable: bool,
     target_info: &LockedTargetInfo,
 ) -> Vec<Symbol> {
+    // TODO(antoyo): use global_gcc_features.
     sess.target
         .supported_target_features()
         .iter()
@@ -477,8 +496,12 @@ pub fn target_features(
                 None
             }
         })
-        .filter(|_feature| {
-            target_info.cpu_supports(_feature)
+        .filter(|feature| {
+            // TODO: we disable Neon for now since we don't support the LLVM intrinsics for it.
+            if *feature == "neon" {
+                return false;
+            }
+            target_info.cpu_supports(feature)
             /*
               adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512fp16, avx512ifma,
               avx512pf, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpopcntdq,
diff --git a/compiler/rustc_codegen_gcc/src/mono_item.rs b/compiler/rustc_codegen_gcc/src/mono_item.rs
index 359d3c70b4c..44657ad4f6e 100644
--- a/compiler/rustc_codegen_gcc/src/mono_item.rs
+++ b/compiler/rustc_codegen_gcc/src/mono_item.rs
@@ -81,6 +81,6 @@ impl<'gcc, 'tcx> PreDefineMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
         // TODO(antoyo): use inline attribute from there in linkage.set() above.
 
         self.functions.borrow_mut().insert(symbol_name.to_string(), decl);
-        self.function_instances.borrow_mut().insert(instance, unsafe { std::mem::transmute(decl) });
+        self.function_instances.borrow_mut().insert(instance, decl);
     }
 }
diff --git a/compiler/rustc_codegen_gcc/src/type_.rs b/compiler/rustc_codegen_gcc/src/type_.rs
index 4caff2e6310..e20234c5ce7 100644
--- a/compiler/rustc_codegen_gcc/src/type_.rs
+++ b/compiler/rustc_codegen_gcc/src/type_.rs
@@ -1,3 +1,8 @@
+#[cfg(feature = "master")]
+use std::convert::TryInto;
+
+#[cfg(feature = "master")]
+use gccjit::CType;
 use gccjit::{RValue, Struct, Type};
 use rustc_codegen_ssa::common::TypeKind;
 use rustc_codegen_ssa::traits::{BaseTypeMethods, DerivedTypeMethods, TypeMembershipMethods};
@@ -142,25 +147,76 @@ impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
     }
 
     fn type_f16(&self) -> Type<'gcc> {
-        unimplemented!("f16_f128")
+        #[cfg(feature = "master")]
+        if self.supports_f16_type {
+            return self.context.new_c_type(CType::Float16);
+        }
+        bug!("unsupported float width 16")
     }
 
     fn type_f32(&self) -> Type<'gcc> {
+        #[cfg(feature = "master")]
+        if self.supports_f32_type {
+            return self.context.new_c_type(CType::Float32);
+        }
         self.float_type
     }
 
     fn type_f64(&self) -> Type<'gcc> {
+        #[cfg(feature = "master")]
+        if self.supports_f64_type {
+            return self.context.new_c_type(CType::Float64);
+        }
         self.double_type
     }
 
     fn type_f128(&self) -> Type<'gcc> {
-        unimplemented!("f16_f128")
+        #[cfg(feature = "master")]
+        if self.supports_f128_type {
+            return self.context.new_c_type(CType::Float128);
+        }
+        bug!("unsupported float width 128")
     }
 
     fn type_func(&self, params: &[Type<'gcc>], return_type: Type<'gcc>) -> Type<'gcc> {
         self.context.new_function_pointer_type(None, return_type, params, false)
     }
 
+    #[cfg(feature = "master")]
+    fn type_kind(&self, typ: Type<'gcc>) -> TypeKind {
+        if self.is_int_type_or_bool(typ) {
+            TypeKind::Integer
+        } else if typ.get_pointee().is_some() {
+            TypeKind::Pointer
+        } else if typ.is_vector() {
+            TypeKind::Vector
+        } else if typ.dyncast_array().is_some() {
+            TypeKind::Array
+        } else if typ.is_struct().is_some() {
+            TypeKind::Struct
+        } else if typ.dyncast_function_ptr_type().is_some() {
+            TypeKind::Function
+        } else if typ.is_compatible_with(self.float_type) {
+            TypeKind::Float
+        } else if typ.is_compatible_with(self.double_type) {
+            TypeKind::Double
+        } else if typ.is_floating_point() {
+            match typ.get_size() {
+                2 => TypeKind::Half,
+                4 => TypeKind::Float,
+                8 => TypeKind::Double,
+                16 => TypeKind::FP128,
+                size => unreachable!("Floating-point type of size {}", size),
+            }
+        } else if typ == self.type_void() {
+            TypeKind::Void
+        } else {
+            // TODO(antoyo): support other types.
+            unimplemented!();
+        }
+    }
+
+    #[cfg(not(feature = "master"))]
     fn type_kind(&self, typ: Type<'gcc>) -> TypeKind {
         if self.is_int_type_or_bool(typ) {
             TypeKind::Integer
@@ -170,9 +226,19 @@ impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
             TypeKind::Double
         } else if typ.is_vector() {
             TypeKind::Vector
+        } else if typ.get_pointee().is_some() {
+            TypeKind::Pointer
+        } else if typ.dyncast_array().is_some() {
+            TypeKind::Array
+        } else if typ.is_struct().is_some() {
+            TypeKind::Struct
+        } else if typ.dyncast_function_ptr_type().is_some() {
+            TypeKind::Function
+        } else if typ == self.type_void() {
+            TypeKind::Void
         } else {
             // TODO(antoyo): support other types.
-            TypeKind::Void
+            unimplemented!();
         }
     }
 
@@ -200,6 +266,16 @@ impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
         unimplemented!();
     }
 
+    #[cfg(feature = "master")]
+    fn float_width(&self, typ: Type<'gcc>) -> usize {
+        if typ.is_floating_point() {
+            (typ.get_size() * u8::BITS).try_into().unwrap()
+        } else {
+            panic!("Cannot get width of float type {:?}", typ);
+        }
+    }
+
+    #[cfg(not(feature = "master"))]
     fn float_width(&self, typ: Type<'gcc>) -> usize {
         let f32 = self.context.new_type::<f32>();
         let f64 = self.context.new_type::<f64>();
diff --git a/compiler/rustc_codegen_gcc/src/type_of.rs b/compiler/rustc_codegen_gcc/src/type_of.rs
index a88d50cb434..d85ed4f12ff 100644
--- a/compiler/rustc_codegen_gcc/src/type_of.rs
+++ b/compiler/rustc_codegen_gcc/src/type_of.rs
@@ -8,7 +8,7 @@ use rustc_middle::ty::print::with_no_trimmed_paths;
 use rustc_middle::ty::{self, CoroutineArgsExt, Ty, TypeVisitableExt};
 use rustc_target::abi::call::{CastTarget, FnAbi, Reg};
 use rustc_target::abi::{
-    self, Abi, Align, FieldsShape, Float, Int, Integer, PointeeInfo, Pointer, Size, TyAbiInterface,
+    self, Abi, FieldsShape, Float, Int, Integer, PointeeInfo, Pointer, Size, TyAbiInterface,
     Variants,
 };
 
@@ -53,12 +53,6 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
     }
 }
 
-impl<'a, 'tcx> CodegenCx<'a, 'tcx> {
-    pub fn align_of(&self, ty: Ty<'tcx>) -> Align {
-        self.layout_of(ty).align.abi
-    }
-}
-
 fn uncached_gcc_type<'gcc, 'tcx>(
     cx: &CodegenCx<'gcc, 'tcx>,
     layout: TyAndLayout<'tcx>,
@@ -90,7 +84,7 @@ fn uncached_gcc_type<'gcc, 'tcx>(
         Abi::Uninhabited | Abi::Aggregate { .. } => {}
     }
 
-    let name = match layout.ty.kind() {
+    let name = match *layout.ty.kind() {
         // FIXME(eddyb) producing readable type names for trait objects can result
         // in problematically distinct types due to HRTB and subtyping (see #47638).
         // ty::Dynamic(..) |
@@ -220,7 +214,7 @@ impl<'tcx> LayoutGccExt<'tcx> for TyAndLayout<'tcx> {
                 // to fn_ptr_backend_type handle the on-stack attribute.
                 // TODO(antoyo): find a less hackish way to hande the on-stack attribute.
                 ty::FnPtr(sig) => {
-                    cx.fn_ptr_backend_type(&cx.fn_abi_of_fn_ptr(sig, ty::List::empty()))
+                    cx.fn_ptr_backend_type(cx.fn_abi_of_fn_ptr(sig, ty::List::empty()))
                 }
                 _ => self.scalar_gcc_type_at(cx, scalar, Size::ZERO),
             };