27 files changed, 1904 insertions, 1127 deletions
diff --git a/compiler/rustc_codegen_gcc/src/abi.rs b/compiler/rustc_codegen_gcc/src/abi.rs
index 6fb1cbfad8c..35bb0b6e5f4 100644
--- a/compiler/rustc_codegen_gcc/src/abi.rs
+++ b/compiler/rustc_codegen_gcc/src/abi.rs
@@ -3,7 +3,9 @@ use rustc_codegen_ssa::traits::{AbiBuilderMethods, BaseTypeMethods};
 use rustc_data_structures::fx::FxHashSet;
 use rustc_middle::bug;
 use rustc_middle::ty::Ty;
-use rustc_target::abi::call::{CastTarget, FnAbi, PassMode, Reg, RegKind};
+#[cfg(feature = "master")]
+use rustc_session::config;
+use rustc_target::abi::call::{ArgAttributes, CastTarget, FnAbi, PassMode, Reg, RegKind};
 
 use crate::builder::Builder;
 use crate::context::CodegenCx;
@@ -113,37 +115,57 @@ impl<'gcc, 'tcx> FnAbiGccExt<'gcc, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
             match self.ret.mode {
                 PassMode::Ignore => cx.type_void(),
                 PassMode::Direct(_) | PassMode::Pair(..) => self.ret.layout.immediate_gcc_type(cx),
-                PassMode::Cast(ref cast, _) => cast.gcc_type(cx),
+                PassMode::Cast { ref cast, .. } => cast.gcc_type(cx),
                 PassMode::Indirect { .. } => {
                     argument_tys.push(cx.type_ptr_to(self.ret.memory_ty(cx)));
                     cx.type_void()
                 }
             };
 
+        #[cfg(feature = "master")]
+        let apply_attrs = |ty: Type<'gcc>, attrs: &ArgAttributes| {
+            if cx.sess().opts.optimize != config::OptLevel::No
+                && attrs.regular.contains(rustc_target::abi::call::ArgAttribute::NoAlias)
+            {
+                ty.make_restrict()
+            } else {
+                ty
+            }
+        };
+        #[cfg(not(feature = "master"))]
+        let apply_attrs = |ty: Type<'gcc>, _attrs: &ArgAttributes| {
+            ty
+        };
+
         for arg in self.args.iter() {
             let arg_ty = match arg.mode {
                 PassMode::Ignore => continue,
-                PassMode::Direct(_) => arg.layout.immediate_gcc_type(cx),
-                PassMode::Pair(..) => {
-                    argument_tys.push(arg.layout.scalar_pair_element_gcc_type(cx, 0, true));
-                    argument_tys.push(arg.layout.scalar_pair_element_gcc_type(cx, 1, true));
+                PassMode::Pair(a, b) => {
+                    argument_tys.push(apply_attrs(arg.layout.scalar_pair_element_gcc_type(cx, 0), &a));
+                    argument_tys.push(apply_attrs(arg.layout.scalar_pair_element_gcc_type(cx, 1), &b));
                     continue;
                 }
-                PassMode::Indirect { extra_attrs: Some(_), .. } => {
-                    unimplemented!();
-                }
-                PassMode::Cast(ref cast, pad_i32) => {
+                PassMode::Cast { ref cast, pad_i32 } => {
                     // add padding
                     if pad_i32 {
                         argument_tys.push(Reg::i32().gcc_type(cx));
                     }
-                    cast.gcc_type(cx)
+                    let ty = cast.gcc_type(cx);
+                    apply_attrs(ty, &cast.attrs)
                 }
-                PassMode::Indirect { extra_attrs: None, on_stack: true, .. } => {
+                PassMode::Indirect { attrs: _, meta_attrs: None, on_stack: true } => {
+                    // This is a "byval" argument, so we don't apply the `restrict` attribute on it.
                     on_stack_param_indices.insert(argument_tys.len());
                     arg.memory_ty(cx)
                 },
-                PassMode::Indirect { extra_attrs: None, on_stack: false, .. } => cx.type_ptr_to(arg.memory_ty(cx)),
+                PassMode::Direct(attrs) => apply_attrs(arg.layout.immediate_gcc_type(cx), &attrs),
+                PassMode::Indirect { attrs, meta_attrs: None, on_stack: false } => {
+                    apply_attrs(cx.type_ptr_to(arg.memory_ty(cx)), &attrs)
+                }
+                PassMode::Indirect { attrs, meta_attrs: Some(meta_attrs), on_stack } => {
+                    assert!(!on_stack);
+                    apply_attrs(apply_attrs(cx.type_ptr_to(arg.memory_ty(cx)), &attrs), &meta_attrs)
+                }
             };
             argument_tys.push(arg_ty);
         }
diff --git a/compiler/rustc_codegen_gcc/src/allocator.rs b/compiler/rustc_codegen_gcc/src/allocator.rs
index e90db44ece1..c8c098e2973 100644
--- a/compiler/rustc_codegen_gcc/src/allocator.rs
+++ b/compiler/rustc_codegen_gcc/src/allocator.rs
@@ -1,14 +1,17 @@
 #[cfg(feature="master")]
 use gccjit::FnAttribute;
-use gccjit::{FunctionType, GlobalKind, ToRValue};
-use rustc_ast::expand::allocator::{AllocatorKind, AllocatorTy, ALLOCATOR_METHODS};
+use gccjit::{Context, FunctionType, GlobalKind, ToRValue, Type};
+use rustc_ast::expand::allocator::{
+    alloc_error_handler_name, default_fn_name, global_fn_name, AllocatorKind, AllocatorTy,
+    ALLOCATOR_METHODS, NO_ALLOC_SHIM_IS_UNSTABLE,
+};
 use rustc_middle::bug;
 use rustc_middle::ty::TyCtxt;
 use rustc_session::config::OomStrategy;
 
 use crate::GccContext;
 
-pub(crate) unsafe fn codegen(tcx: TyCtxt<'_>, mods: &mut GccContext, _module_name: &str, kind: AllocatorKind) {
+pub(crate) unsafe fn codegen(tcx: TyCtxt<'_>, mods: &mut GccContext, _module_name: &str, kind: AllocatorKind, alloc_error_handler_kind: AllocatorKind) {
     let context = &mods.context;
     let usize =
         match tcx.sess.target.pointer_width {
@@ -19,76 +22,105 @@ pub(crate) unsafe fn codegen(tcx: TyCtxt<'_>, mods: &mut GccContext, _module_nam
         };
     let i8 = context.new_type::<i8>();
     let i8p = i8.make_pointer();
-    let void = context.new_type::<()>();
-
-    for method in ALLOCATOR_METHODS {
-        let mut types = Vec::with_capacity(method.inputs.len());
-        for ty in method.inputs.iter() {
-            match *ty {
-                AllocatorTy::Layout => {
-                    types.push(usize);
-                    types.push(usize);
-                }
-                AllocatorTy::Ptr => types.push(i8p),
-                AllocatorTy::Usize => types.push(usize),
 
-                AllocatorTy::ResultPtr | AllocatorTy::Unit => panic!("invalid allocator arg"),
-            }
-        }
-        let output = match method.output {
-            AllocatorTy::ResultPtr => Some(i8p),
-            AllocatorTy::Unit => None,
+    if kind == AllocatorKind::Default {
+        for method in ALLOCATOR_METHODS {
+            let mut types = Vec::with_capacity(method.inputs.len());
+            for input in method.inputs.iter() {
+                match input.ty {
+                    AllocatorTy::Layout => {
+                        types.push(usize);
+                        types.push(usize);
+                    }
+                    AllocatorTy::Ptr => types.push(i8p),
+                    AllocatorTy::Usize => types.push(usize),
 
-            AllocatorTy::Layout | AllocatorTy::Usize | AllocatorTy::Ptr => {
-                panic!("invalid allocator output")
+                    AllocatorTy::ResultPtr | AllocatorTy::Unit => panic!("invalid allocator arg"),
+                }
             }
-        };
-        let name = format!("__rust_{}", method.name);
-
-        let args: Vec<_> = types.iter().enumerate()
-            .map(|(index, typ)| context.new_parameter(None, *typ, &format!("param{}", index)))
-            .collect();
-        let func = context.new_function(None, FunctionType::Exported, output.unwrap_or(void), &args, name, false);
+            let output = match method.output {
+                AllocatorTy::ResultPtr => Some(i8p),
+                AllocatorTy::Unit => None,
 
-        if tcx.sess.target.options.default_hidden_visibility {
-            #[cfg(feature="master")]
-            func.add_attribute(FnAttribute::Visibility(gccjit::Visibility::Hidden));
-        }
-        if tcx.sess.must_emit_unwind_tables() {
-            // TODO(antoyo): emit unwind tables.
-        }
+                AllocatorTy::Layout | AllocatorTy::Usize | AllocatorTy::Ptr => {
+                    panic!("invalid allocator output")
+                }
+            };
+            let from_name = global_fn_name(method.name);
+            let to_name = default_fn_name(method.name);
 
-        let callee = kind.fn_name(method.name);
-        let args: Vec<_> = types.iter().enumerate()
-            .map(|(index, typ)| context.new_parameter(None, *typ, &format!("param{}", index)))
-            .collect();
-        let callee = context.new_function(None, FunctionType::Extern, output.unwrap_or(void), &args, callee, false);
-        #[cfg(feature="master")]
-        callee.add_attribute(FnAttribute::Visibility(gccjit::Visibility::Hidden));
-
-        let block = func.new_block("entry");
-
-        let args = args
-            .iter()
-            .enumerate()
-            .map(|(i, _)| func.get_param(i as i32).to_rvalue())
-            .collect::<Vec<_>>();
-        let ret = context.new_call(None, callee, &args);
-        //llvm::LLVMSetTailCall(ret, True);
-        if output.is_some() {
-            block.end_with_return(None, ret);
+            create_wrapper_function(tcx, context, &from_name, &to_name, &types, output);
         }
-        else {
-            block.end_with_void_return(None);
-        }
-
-        // TODO(@Commeownist): Check if we need to emit some extra debugging info in certain circumstances
-        // as described in https://github.com/rust-lang/rust/commit/77a96ed5646f7c3ee8897693decc4626fe380643
     }
 
+    // FIXME(bjorn3): Add noreturn attribute
+    create_wrapper_function(
+        tcx,
+        context,
+        "__rust_alloc_error_handler",
+        &alloc_error_handler_name(alloc_error_handler_kind),
+        &[usize, usize],
+        None,
+    );
+
     let name = OomStrategy::SYMBOL.to_string();
     let global = context.new_global(None, GlobalKind::Exported, i8, name);
     let value = tcx.sess.opts.unstable_opts.oom.should_panic();
     let value = context.new_rvalue_from_int(i8, value as i32);
     global.global_set_initializer_rvalue(value);
+
+    let name = NO_ALLOC_SHIM_IS_UNSTABLE.to_string();
+    let global = context.new_global(None, GlobalKind::Exported, i8, name);
+    let value = context.new_rvalue_from_int(i8, 0);
+    global.global_set_initializer_rvalue(value);
+}
+
+fn create_wrapper_function(
+    tcx: TyCtxt<'_>,
+    context: &Context<'_>,
+    from_name: &str,
+    to_name: &str,
+    types: &[Type<'_>],
+    output: Option<Type<'_>>,
+) {
+    let void = context.new_type::<()>();
+
+    let args: Vec<_> = types.iter().enumerate()
+        .map(|(index, typ)| context.new_parameter(None, *typ, &format!("param{}", index)))
+        .collect();
+    let func = context.new_function(None, FunctionType::Exported, output.unwrap_or(void), &args, from_name, false);
+
+    if tcx.sess.target.options.default_hidden_visibility {
+        #[cfg(feature="master")]
+        func.add_attribute(FnAttribute::Visibility(gccjit::Visibility::Hidden));
+    }
+    if tcx.sess.must_emit_unwind_tables() {
+        // TODO(antoyo): emit unwind tables.
+    }
+
+    let args: Vec<_> = types.iter().enumerate()
+        .map(|(index, typ)| context.new_parameter(None, *typ, &format!("param{}", index)))
+        .collect();
+    let callee = context.new_function(None, FunctionType::Extern, output.unwrap_or(void), &args, to_name, false);
+    #[cfg(feature="master")]
+    callee.add_attribute(FnAttribute::Visibility(gccjit::Visibility::Hidden));
+
+    let block = func.new_block("entry");
+
+    let args = args
+        .iter()
+        .enumerate()
+        .map(|(i, _)| func.get_param(i as i32).to_rvalue())
+        .collect::<Vec<_>>();
+    let ret = context.new_call(None, callee, &args);
+    //llvm::LLVMSetTailCall(ret, True);
+    if output.is_some() {
+        block.end_with_return(None, ret);
+    }
+    else {
+        block.end_with_void_return(None);
+    }
+
+    // TODO(@Commeownist): Check if we need to emit some extra debugging info in certain circumstances
+    // as described in https://github.com/rust-lang/rust/commit/77a96ed5646f7c3ee8897693decc4626fe380643
 }
diff --git a/compiler/rustc_codegen_gcc/src/asm.rs b/compiler/rustc_codegen_gcc/src/asm.rs
index 65de02b3567..f3a9ca77a67 100644
--- a/compiler/rustc_codegen_gcc/src/asm.rs
+++ b/compiler/rustc_codegen_gcc/src/asm.rs
@@ -107,7 +107,7 @@ enum ConstraintOrRegister {
 
 
 impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
-    fn codegen_inline_asm(&mut self, template: &[InlineAsmTemplatePiece], rust_operands: &[InlineAsmOperandRef<'tcx, Self>], options: InlineAsmOptions, span: &[Span], _instance: Instance<'_>, _dest_catch_funclet: Option<(Self::BasicBlock, Self::BasicBlock, Option<&Self::Funclet>)>) {
+    fn codegen_inline_asm(&mut self, template: &[InlineAsmTemplatePiece], rust_operands: &[InlineAsmOperandRef<'tcx, Self>], options: InlineAsmOptions, span: &[Span], instance: Instance<'_>, _dest_catch_funclet: Option<(Self::BasicBlock, Self::BasicBlock, Option<&Self::Funclet>)>) {
         if options.contains(InlineAsmOptions::MAY_UNWIND) {
             self.sess()
                 .create_err(UnwindingInlineAsm { span: span[0] })
@@ -173,7 +173,7 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
                             let is_target_supported = reg.reg_class().supported_types(asm_arch).iter()
                                 .any(|&(_, feature)| {
                                     if let Some(feature) = feature {
-                                        self.tcx.sess.target_features.contains(&feature)
+                                        self.tcx.asm_target_features(instance.def_id()).contains(&feature)
                                     } else {
                                         true // Register class is unconditionally supported
                                     }
@@ -452,10 +452,6 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
                         }
 
                         InlineAsmOperandRef::Const { ref string } => {
-                            // Const operands get injected directly into the template
-                            if att_dialect {
-                                template_str.push('$');
-                            }
                             template_str.push_str(string);
                         }
                     }
@@ -501,7 +497,7 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
         if options.contains(InlineAsmOptions::NORETURN) {
             let builtin_unreachable = self.context.get_builtin_function("__builtin_unreachable");
             let builtin_unreachable: RValue<'gcc> = unsafe { std::mem::transmute(builtin_unreachable) };
-            self.call(self.type_void(), None, builtin_unreachable, &[], None);
+            self.call(self.type_void(), None, None, builtin_unreachable, &[], None);
         }
 
         // Write results to outputs.
@@ -518,7 +514,6 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
                 OperandValue::Immediate(op.tmp_var.to_rvalue()).store(self, place);
             }
         }
-
     }
 }
 
@@ -593,9 +588,13 @@ fn reg_to_gcc(reg: InlineAsmRegOrRegClass) -> ConstraintOrRegister {
             InlineAsmRegClass::Bpf(BpfInlineAsmRegClass::reg) => "r",
             InlineAsmRegClass::Bpf(BpfInlineAsmRegClass::wreg) => "w",
             InlineAsmRegClass::Hexagon(HexagonInlineAsmRegClass::reg) => "r",
+            InlineAsmRegClass::LoongArch(LoongArchInlineAsmRegClass::reg) => "r",
+            InlineAsmRegClass::LoongArch(LoongArchInlineAsmRegClass::freg) => "f",
             InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg) => "r",
             InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg_addr) => "a",
             InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg_data) => "d",
+            InlineAsmRegClass::CSKY(CSKYInlineAsmRegClass::reg) => "r",
+            InlineAsmRegClass::CSKY(CSKYInlineAsmRegClass::freg) => "f",
             InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg) => "d", // more specific than "r"
             InlineAsmRegClass::Mips(MipsInlineAsmRegClass::freg) => "f",
             InlineAsmRegClass::Msp430(Msp430InlineAsmRegClass::reg) => "r",
@@ -667,9 +666,13 @@ fn dummy_output_type<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, reg: InlineAsmRegCl
         InlineAsmRegClass::Avr(_) => unimplemented!(),
         InlineAsmRegClass::Bpf(_) => unimplemented!(),
         InlineAsmRegClass::Hexagon(HexagonInlineAsmRegClass::reg) => cx.type_i32(),
+        InlineAsmRegClass::LoongArch(LoongArchInlineAsmRegClass::reg) => cx.type_i32(),
+        InlineAsmRegClass::LoongArch(LoongArchInlineAsmRegClass::freg) => cx.type_f32(),
         InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg) => cx.type_i32(),
         InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg_addr) => cx.type_i32(),
         InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg_data) => cx.type_i32(),
+        InlineAsmRegClass::CSKY(CSKYInlineAsmRegClass::reg) => cx.type_i32(),
+        InlineAsmRegClass::CSKY(CSKYInlineAsmRegClass::freg) => cx.type_f32(),
         InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg) => cx.type_i32(),
         InlineAsmRegClass::Mips(MipsInlineAsmRegClass::freg) => cx.type_f32(),
         InlineAsmRegClass::Msp430(_) => unimplemented!(),
@@ -804,6 +807,7 @@ fn modifier_to_gcc(arch: InlineAsmArch, reg: InlineAsmRegClass, modifier: Option
             }
         }
         InlineAsmRegClass::Hexagon(_) => None,
+        InlineAsmRegClass::LoongArch(_) => None,
         InlineAsmRegClass::Mips(_) => None,
         InlineAsmRegClass::Nvptx(_) => None,
         InlineAsmRegClass::PowerPC(_) => None,
@@ -856,6 +860,7 @@ fn modifier_to_gcc(arch: InlineAsmArch, reg: InlineAsmRegClass, modifier: Option
         InlineAsmRegClass::S390x(_) => None,
         InlineAsmRegClass::Msp430(_) => None,
         InlineAsmRegClass::M68k(_) => None,
+        InlineAsmRegClass::CSKY(_) => None,
         InlineAsmRegClass::SpirV(SpirVInlineAsmRegClass::reg) => {
             bug!("LLVM backend does not support SPIR-V")
         }
diff --git a/compiler/rustc_codegen_gcc/src/attributes.rs b/compiler/rustc_codegen_gcc/src/attributes.rs
index db841b1b524..971e019a4f6 100644
--- a/compiler/rustc_codegen_gcc/src/attributes.rs
+++ b/compiler/rustc_codegen_gcc/src/attributes.rs
@@ -2,68 +2,31 @@
 use gccjit::FnAttribute;
 use gccjit::Function;
 use rustc_attr::InstructionSetAttr;
-use rustc_codegen_ssa::target_features::tied_target_features;
-use rustc_data_structures::fx::FxHashMap;
+#[cfg(feature="master")]
+use rustc_attr::InlineAttr;
 use rustc_middle::ty;
-use rustc_session::Session;
+#[cfg(feature="master")]
+use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
 use rustc_span::symbol::sym;
-use smallvec::{smallvec, SmallVec};
 
 use crate::{context::CodegenCx, errors::TiedTargetFeatures};
+use crate::gcc_util::{check_tied_features, to_gcc_features};
 
-// Given a map from target_features to whether they are enabled or disabled,
-// ensure only valid combinations are allowed.
-pub fn check_tied_features(sess: &Session, features: &FxHashMap<&str, bool>) -> Option<&'static [&'static str]> {
-    for tied in tied_target_features(sess) {
-        // Tied features must be set to the same value, or not set at all
-        let mut tied_iter = tied.iter();
-        let enabled = features.get(tied_iter.next().unwrap());
-        if tied_iter.any(|feature| enabled != features.get(feature)) {
-            return Some(tied);
+/// Get GCC attribute for the provided inline heuristic.
+#[cfg(feature="master")]
+#[inline]
+fn inline_attr<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, inline: InlineAttr) -> Option<FnAttribute<'gcc>> {
+    match inline {
+        InlineAttr::Hint => Some(FnAttribute::Inline),
+        InlineAttr::Always => Some(FnAttribute::AlwaysInline),
+        InlineAttr::Never => {
+            if cx.sess().target.arch != "amdgpu" {
+                Some(FnAttribute::NoInline)
+            } else {
+                None
+            }
         }
-    }
-    None
-}
-
-// TODO(antoyo): maybe move to a new module gcc_util.
-// To find a list of GCC's names, check https://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
-fn to_gcc_features<'a>(sess: &Session, s: &'a str) -> SmallVec<[&'a str; 2]> {
-    let arch = if sess.target.arch == "x86_64" { "x86" } else { &*sess.target.arch };
-    match (arch, s) {
-        ("x86", "sse4.2") => smallvec!["sse4.2", "crc32"],
-        ("x86", "pclmulqdq") => smallvec!["pclmul"],
-        ("x86", "rdrand") => smallvec!["rdrnd"],
-        ("x86", "bmi1") => smallvec!["bmi"],
-        ("x86", "cmpxchg16b") => smallvec!["cx16"],
-        ("x86", "avx512vaes") => smallvec!["vaes"],
-        ("x86", "avx512gfni") => smallvec!["gfni"],
-        ("x86", "avx512vpclmulqdq") => smallvec!["vpclmulqdq"],
-        // NOTE: seems like GCC requires 'avx512bw' for 'avx512vbmi2'.
-        ("x86", "avx512vbmi2") => smallvec!["avx512vbmi2", "avx512bw"],
-        // NOTE: seems like GCC requires 'avx512bw' for 'avx512bitalg'.
-        ("x86", "avx512bitalg") => smallvec!["avx512bitalg", "avx512bw"],
-        ("aarch64", "rcpc2") => smallvec!["rcpc-immo"],
-        ("aarch64", "dpb") => smallvec!["ccpp"],
-        ("aarch64", "dpb2") => smallvec!["ccdp"],
-        ("aarch64", "frintts") => smallvec!["fptoint"],
-        ("aarch64", "fcma") => smallvec!["complxnum"],
-        ("aarch64", "pmuv3") => smallvec!["perfmon"],
-        ("aarch64", "paca") => smallvec!["pauth"],
-        ("aarch64", "pacg") => smallvec!["pauth"],
-        // Rust ties fp and neon together. In LLVM neon implicitly enables fp,
-        // but we manually enable neon when a feature only implicitly enables fp
-        ("aarch64", "f32mm") => smallvec!["f32mm", "neon"],
-        ("aarch64", "f64mm") => smallvec!["f64mm", "neon"],
-        ("aarch64", "fhm") => smallvec!["fp16fml", "neon"],
-        ("aarch64", "fp16") => smallvec!["fullfp16", "neon"],
-        ("aarch64", "jsconv") => smallvec!["jsconv", "neon"],
-        ("aarch64", "sve") => smallvec!["sve", "neon"],
-        ("aarch64", "sve2") => smallvec!["sve2", "neon"],
-        ("aarch64", "sve2-aes") => smallvec!["sve2-aes", "neon"],
-        ("aarch64", "sve2-sm4") => smallvec!["sve2-sm4", "neon"],
-        ("aarch64", "sve2-sha3") => smallvec!["sve2-sha3", "neon"],
-        ("aarch64", "sve2-bitperm") => smallvec!["sve2-bitperm", "neon"],
-        (_, s) => smallvec![s],
+        InlineAttr::None => None,
     }
 }
 
@@ -77,6 +40,36 @@ pub fn from_fn_attrs<'gcc, 'tcx>(
 ) {
     let codegen_fn_attrs = cx.tcx.codegen_fn_attrs(instance.def_id());
 
+    #[cfg(feature="master")]
+    {
+        let inline =
+            if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED) {
+                InlineAttr::Never
+            }
+            else if codegen_fn_attrs.inline == InlineAttr::None && instance.def.requires_inline(cx.tcx) {
+                InlineAttr::Hint
+            }
+            else {
+                codegen_fn_attrs.inline
+            };
+        if let Some(attr) = inline_attr(cx, inline) {
+            func.add_attribute(attr);
+        }
+
+        if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::COLD) {
+            func.add_attribute(FnAttribute::Cold);
+        }
+        if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::FFI_RETURNS_TWICE) {
+            func.add_attribute(FnAttribute::ReturnsTwice);
+        }
+        if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::FFI_PURE) {
+            func.add_attribute(FnAttribute::Pure);
+        }
+        if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::FFI_CONST) {
+            func.add_attribute(FnAttribute::Const);
+        }
+    }
+
     let function_features =
         codegen_fn_attrs.target_features.iter().map(|features| features.as_str()).collect::<Vec<&str>>();
 
@@ -101,11 +94,33 @@ pub fn from_fn_attrs<'gcc, 'tcx>(
         }))
         .collect::<Vec<_>>();
 
-    // TODO(antoyo): check if we really need global backend features. (Maybe they could be applied
-    // globally?)
+    // TODO(antoyo): cg_llvm adds global features to each function so that LTO keep them.
+    // Check if GCC requires the same.
     let mut global_features = cx.tcx.global_backend_features(()).iter().map(|s| s.as_str());
     function_features.extend(&mut global_features);
-    let target_features = function_features.join(",");
+    let target_features = function_features
+        .iter()
+        .filter_map(|feature| {
+            // FIXME(antoyo): for some reasons, disabling SSE results in the following error when
+            // compiling Rust for Linux:
+            // SSE register return with SSE disabled
+            // TODO(antoyo): support soft-float and retpoline-external-thunk.
+            if feature.contains("soft-float") || feature.contains("retpoline-external-thunk") || *feature == "-sse" {
+                return None;
+            }
+
+            if feature.starts_with('-') {
+                Some(format!("no{}", feature))
+            }
+            else if feature.starts_with('+') {
+                Some(feature[1..].to_string())
+            }
+            else {
+                Some(feature.to_string())
+            }
+        })
+        .collect::<Vec<_>>()
+        .join(",");
     if !target_features.is_empty() {
         #[cfg(feature="master")]
         func.add_attribute(FnAttribute::Target(&target_features));
diff --git a/compiler/rustc_codegen_gcc/src/back/lto.rs b/compiler/rustc_codegen_gcc/src/back/lto.rs
new file mode 100644
index 00000000000..529454b119e
--- /dev/null
+++ b/compiler/rustc_codegen_gcc/src/back/lto.rs
@@ -0,0 +1,341 @@
+/// GCC requires to use the same toolchain for the whole compilation when doing LTO.
+/// So, we need the same version/commit of the linker (gcc) and lto front-end binaries (lto1,
+/// lto-wrapper, liblto_plugin.so).
+
+// FIXME(antoyo): the executables compiled with LTO are bigger than those compiled without LTO.
+// Since it is the opposite for cg_llvm, check if this is normal.
+//
+// Maybe we embed the bitcode in the final binary?
+// It doesn't look like we try to generate fat objects for the final binary.
+// Check if the way we combine the object files make it keep the LTO sections on the final link.
+// Maybe that's because the combined object files contain the IR (true) and the final link
+// does not remove it?
+//
+// TODO(antoyo): for performance, check which optimizations the C++ frontend enables.
+//
+// Fix these warnings:
+// /usr/bin/ld: warning: type of symbol `_RNvNvNvNtCs5JWOrf9uCus_5rayon11thread_pool19WORKER_THREAD_STATE7___getit5___KEY' changed from 1 to 6 in /tmp/ccKeUSiR.ltrans0.ltrans.o
+// /usr/bin/ld: warning: type of symbol `_RNvNvNvNvNtNtNtCsAj5i4SGTR7_3std4sync4mpmc5waker17current_thread_id5DUMMY7___getit5___KEY' changed from 1 to 6 in /tmp/ccKeUSiR.ltrans0.ltrans.o
+// /usr/bin/ld: warning: incremental linking of LTO and non-LTO objects; using -flinker-output=nolto-rel which will bypass whole program optimization
+
+use std::ffi::CString;
+use std::fs::{self, File};
+use std::path::{Path, PathBuf};
+
+use gccjit::OutputKind;
+use object::read::archive::ArchiveFile;
+use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule};
+use rustc_codegen_ssa::back::symbol_export;
+use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput};
+use rustc_codegen_ssa::traits::*;
+use rustc_codegen_ssa::{looks_like_rust_object_file, ModuleCodegen, ModuleKind};
+use rustc_data_structures::memmap::Mmap;
+use rustc_errors::{FatalError, Handler};
+use rustc_hir::def_id::LOCAL_CRATE;
+use rustc_middle::dep_graph::WorkProduct;
+use rustc_middle::middle::exported_symbols::{SymbolExportInfo, SymbolExportLevel};
+use rustc_session::config::{CrateType, Lto};
+use tempfile::{TempDir, tempdir};
+
+use crate::back::write::save_temp_bitcode;
+use crate::errors::{
+    DynamicLinkingWithLTO, LtoBitcodeFromRlib, LtoDisallowed, LtoDylib,
+};
+use crate::{GccCodegenBackend, GccContext, to_gcc_opt_level};
+
+/// We keep track of the computed LTO cache keys from the previous
+/// session to determine which CGUs we can reuse.
+//pub const THIN_LTO_KEYS_INCR_COMP_FILE_NAME: &str = "thin-lto-past-keys.bin";
+
+pub fn crate_type_allows_lto(crate_type: CrateType) -> bool {
+    match crate_type {
+        CrateType::Executable | CrateType::Dylib | CrateType::Staticlib | CrateType::Cdylib => true,
+        CrateType::Rlib | CrateType::ProcMacro => false,
+    }
+}
+
+struct LtoData {
+    // TODO(antoyo): use symbols_below_threshold.
+    //symbols_below_threshold: Vec<CString>,
+    upstream_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
+    tmp_path: TempDir,
+}
+
+fn prepare_lto(cgcx: &CodegenContext<GccCodegenBackend>, diag_handler: &Handler) -> Result<LtoData, FatalError> {
+    let export_threshold = match cgcx.lto {
+        // We're just doing LTO for our one crate
+        Lto::ThinLocal => SymbolExportLevel::Rust,
+
+        // We're doing LTO for the entire crate graph
+        Lto::Fat | Lto::Thin => symbol_export::crates_export_threshold(&cgcx.crate_types),
+
+        Lto::No => panic!("didn't request LTO but we're doing LTO"),
+    };
+
+    let tmp_path =
+        match tempdir() {
+            Ok(tmp_path) => tmp_path,
+            Err(error) => {
+                eprintln!("Cannot create temporary directory: {}", error);
+                return Err(FatalError);
+            },
+        };
+
+    let symbol_filter = &|&(ref name, info): &(String, SymbolExportInfo)| {
+        if info.level.is_below_threshold(export_threshold) || info.used {
+            Some(CString::new(name.as_str()).unwrap())
+        } else {
+            None
+        }
+    };
+    let exported_symbols = cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO");
+    let mut symbols_below_threshold = {
+        let _timer = cgcx.prof.generic_activity("GCC_lto_generate_symbols_below_threshold");
+        exported_symbols[&LOCAL_CRATE].iter().filter_map(symbol_filter).collect::<Vec<CString>>()
+    };
+    info!("{} symbols to preserve in this crate", symbols_below_threshold.len());
+
+    // If we're performing LTO for the entire crate graph, then for each of our
+    // upstream dependencies, find the corresponding rlib and load the bitcode
+    // from the archive.
+    //
+    // We save off all the bytecode and GCC module file path for later processing
+    // with either fat or thin LTO
+    let mut upstream_modules = Vec::new();
+    if cgcx.lto != Lto::ThinLocal {
+        // Make sure we actually can run LTO
+        for crate_type in cgcx.crate_types.iter() {
+            if !crate_type_allows_lto(*crate_type) {
+                diag_handler.emit_err(LtoDisallowed);
+                return Err(FatalError);
+            } else if *crate_type == CrateType::Dylib {
+                if !cgcx.opts.unstable_opts.dylib_lto {
+                    diag_handler.emit_err(LtoDylib);
+                    return Err(FatalError);
+                }
+            }
+        }
+
+        if cgcx.opts.cg.prefer_dynamic && !cgcx.opts.unstable_opts.dylib_lto {
+            diag_handler.emit_err(DynamicLinkingWithLTO);
+            return Err(FatalError);
+        }
+
+        for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() {
+            let exported_symbols =
+                cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO");
+            {
+                let _timer =
+                    cgcx.prof.generic_activity("GCC_lto_generate_symbols_below_threshold");
+                symbols_below_threshold
+                    .extend(exported_symbols[&cnum].iter().filter_map(symbol_filter));
+            }
+
+            let archive_data = unsafe {
+                Mmap::map(File::open(&path).expect("couldn't open rlib"))
+                    .expect("couldn't map rlib")
+            };
+            let archive = ArchiveFile::parse(&*archive_data).expect("wanted an rlib");
+            let obj_files = archive
+                .members()
+                .filter_map(|child| {
+                    child.ok().and_then(|c| {
+                        std::str::from_utf8(c.name()).ok().map(|name| (name.trim(), c))
+                    })
+                })
+                .filter(|&(name, _)| looks_like_rust_object_file(name));
+            for (name, child) in obj_files {
+                info!("adding bitcode from {}", name);
+                let path = tmp_path.path().join(name);
+                match save_as_file(child.data(&*archive_data).expect("corrupt rlib"), &path) {
+                    Ok(()) => {
+                        let buffer = ModuleBuffer::new(path);
+                        let module = SerializedModule::Local(buffer);
+                        upstream_modules.push((module, CString::new(name).unwrap()));
+                    }
+                    Err(e) => {
+                        diag_handler.emit_err(e);
+                        return Err(FatalError);
+                    }
+                }
+            }
+        }
+    }
+
+    Ok(LtoData {
+        //symbols_below_threshold,
+        upstream_modules,
+        tmp_path,
+    })
+}
+
+fn save_as_file(obj: &[u8], path: &Path) -> Result<(), LtoBitcodeFromRlib> {
+    fs::write(path, obj)
+        .map_err(|error| LtoBitcodeFromRlib {
+            gcc_err: format!("write object file to temp dir: {}", error)
+        })
+}
+
+/// Performs fat LTO by merging all modules into a single one and returning it
+/// for further optimization.
+pub(crate) fn run_fat(
+    cgcx: &CodegenContext<GccCodegenBackend>,
+    modules: Vec<FatLtoInput<GccCodegenBackend>>,
+    cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
+) -> Result<LtoModuleCodegen<GccCodegenBackend>, FatalError> {
+    let diag_handler = cgcx.create_diag_handler();
+    let lto_data = prepare_lto(cgcx, &diag_handler)?;
+    /*let symbols_below_threshold =
+        lto_data.symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();*/
+    fat_lto(cgcx, &diag_handler, modules, cached_modules, lto_data.upstream_modules, lto_data.tmp_path,
+        //&symbols_below_threshold,
+    )
+}
+
+fn fat_lto(cgcx: &CodegenContext<GccCodegenBackend>, _diag_handler: &Handler, modules: Vec<FatLtoInput<GccCodegenBackend>>, cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>, mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>, tmp_path: TempDir,
+    //symbols_below_threshold: &[*const libc::c_char],
+) -> Result<LtoModuleCodegen<GccCodegenBackend>, FatalError> {
+    let _timer = cgcx.prof.generic_activity("GCC_fat_lto_build_monolithic_module");
+    info!("going for a fat lto");
+
+    // Sort out all our lists of incoming modules into two lists.
+    //
+    // * `serialized_modules` (also and argument to this function) contains all
+    //   modules that are serialized in-memory.
+    // * `in_memory` contains modules which are already parsed and in-memory,
+    //   such as from multi-CGU builds.
+    //
+    // All of `cached_modules` (cached from previous incremental builds) can
+    // immediately go onto the `serialized_modules` modules list and then we can
+    // split the `modules` array into these two lists.
+    let mut in_memory = Vec::new();
+    serialized_modules.extend(cached_modules.into_iter().map(|(buffer, wp)| {
+        info!("pushing cached module {:?}", wp.cgu_name);
+        (buffer, CString::new(wp.cgu_name).unwrap())
+    }));
+    for module in modules {
+        match module {
+            FatLtoInput::InMemory(m) => in_memory.push(m),
+            FatLtoInput::Serialized { name, buffer } => {
+                info!("pushing serialized module {:?}", name);
+                let buffer = SerializedModule::Local(buffer);
+                serialized_modules.push((buffer, CString::new(name).unwrap()));
+            }
+        }
+    }
+
+    // Find the "costliest" module and merge everything into that codegen unit.
+    // All the other modules will be serialized and reparsed into the new
+    // context, so this hopefully avoids serializing and parsing the largest
+    // codegen unit.
+    //
+    // Additionally use a regular module as the base here to ensure that various
+    // file copy operations in the backend work correctly. The only other kind
+    // of module here should be an allocator one, and if your crate is smaller
+    // than the allocator module then the size doesn't really matter anyway.
+    let costliest_module = in_memory
+        .iter()
+        .enumerate()
+        .filter(|&(_, module)| module.kind == ModuleKind::Regular)
+        .map(|(i, _module)| {
+            //let cost = unsafe { llvm::LLVMRustModuleCost(module.module_llvm.llmod()) };
+            // TODO(antoyo): compute the cost of a module if GCC allows this.
+            (0, i)
+        })
+        .max();
+
+    // If we found a costliest module, we're good to go. Otherwise all our
+    // inputs were serialized which could happen in the case, for example, that
+    // all our inputs were incrementally reread from the cache and we're just
+    // re-executing the LTO passes. If that's the case deserialize the first
+    // module and create a linker with it.
+    let mut module: ModuleCodegen<GccContext> = match costliest_module {
+        Some((_cost, i)) => in_memory.remove(i),
+        None => {
+            unimplemented!("Incremental");
+            /*assert!(!serialized_modules.is_empty(), "must have at least one serialized module");
+            let (buffer, name) = serialized_modules.remove(0);
+            info!("no in-memory regular modules to choose from, parsing {:?}", name);
+            ModuleCodegen {
+                module_llvm: GccContext::parse(cgcx, &name, buffer.data(), diag_handler)?,
+                name: name.into_string().unwrap(),
+                kind: ModuleKind::Regular,
+            }*/
+        }
+    };
+    let mut serialized_bitcode = Vec::new();
+    {
+        info!("using {:?} as a base module", module.name);
+
+        // We cannot load and merge GCC contexts in memory like cg_llvm is doing.
+        // Instead, we combine the object files into a single object file.
+        for module in in_memory {
+            let path = tmp_path.path().to_path_buf().join(&module.name);
+            let path = path.to_str().expect("path");
+            let context = &module.module_llvm.context;
+            let config = cgcx.config(module.kind);
+            // NOTE: we need to set the optimization level here in order for LTO to do its job.
+            context.set_optimization_level(to_gcc_opt_level(config.opt_level));
+            context.add_command_line_option("-flto=auto");
+            context.add_command_line_option("-flto-partition=one");
+            context.compile_to_file(OutputKind::ObjectFile, path);
+            let buffer = ModuleBuffer::new(PathBuf::from(path));
+            let llmod_id = CString::new(&module.name[..]).unwrap();
+            serialized_modules.push((SerializedModule::Local(buffer), llmod_id));
+        }
+        // Sort the modules to ensure we produce deterministic results.
+        serialized_modules.sort_by(|module1, module2| module1.1.cmp(&module2.1));
+
+        // We add the object files and save in should_combine_object_files that we should combine
+        // them into a single object file when compiling later.
+        for (bc_decoded, name) in serialized_modules {
+            let _timer = cgcx
+                .prof
+                .generic_activity_with_arg_recorder("GCC_fat_lto_link_module", |recorder| {
+                    recorder.record_arg(format!("{:?}", name))
+                });
+            info!("linking {:?}", name);
+            match bc_decoded {
+                SerializedModule::Local(ref module_buffer) => {
+                    module.module_llvm.should_combine_object_files = true;
+                    module.module_llvm.context.add_driver_option(module_buffer.0.to_str().expect("path"));
+                },
+                SerializedModule::FromRlib(_) => unimplemented!("from rlib"),
+                SerializedModule::FromUncompressedFile(_) => unimplemented!("from uncompressed file"),
+            }
+            serialized_bitcode.push(bc_decoded);
+        }
+        save_temp_bitcode(cgcx, &module, "lto.input");
+
+        // Internalize everything below threshold to help strip out more modules and such.
+        /*unsafe {
+            let ptr = symbols_below_threshold.as_ptr();
+            llvm::LLVMRustRunRestrictionPass(
+                llmod,
+                ptr as *const *const libc::c_char,
+                symbols_below_threshold.len() as libc::size_t,
+            );*/
+            save_temp_bitcode(cgcx, &module, "lto.after-restriction");
+        //}
+    }
+
+    // NOTE: save the temporary directory used by LTO so that it gets deleted after linking instead
+    // of now.
+    module.module_llvm.temp_dir = Some(tmp_path);
+
+    Ok(LtoModuleCodegen::Fat { module, _serialized_bitcode: serialized_bitcode })
+}
+
+pub struct ModuleBuffer(PathBuf);
+
+impl ModuleBuffer {
+    pub fn new(path: PathBuf) -> ModuleBuffer {
+        ModuleBuffer(path)
+    }
+}
+
+impl ModuleBufferMethods for ModuleBuffer {
+    fn data(&self) -> &[u8] {
+        unimplemented!("data not needed for GCC codegen");
+    }
+}
diff --git a/compiler/rustc_codegen_gcc/src/back/mod.rs b/compiler/rustc_codegen_gcc/src/back/mod.rs
index d692799d764..10187eab0d7 100644
--- a/compiler/rustc_codegen_gcc/src/back/mod.rs
+++ b/compiler/rustc_codegen_gcc/src/back/mod.rs
@@ -1 +1,2 @@
+pub mod lto;
 pub mod write;
diff --git a/compiler/rustc_codegen_gcc/src/back/write.rs b/compiler/rustc_codegen_gcc/src/back/write.rs
index 5f54ac4ebc6..04772d7707a 100644
--- a/compiler/rustc_codegen_gcc/src/back/write.rs
+++ b/compiler/rustc_codegen_gcc/src/back/write.rs
@@ -2,27 +2,71 @@ use std::{env, fs};
 
 use gccjit::OutputKind;
 use rustc_codegen_ssa::{CompiledModule, ModuleCodegen};
-use rustc_codegen_ssa::back::write::{CodegenContext, EmitObj, ModuleConfig};
+use rustc_codegen_ssa::back::link::ensure_removed;
+use rustc_codegen_ssa::back::write::{BitcodeSection, CodegenContext, EmitObj, ModuleConfig};
 use rustc_errors::Handler;
+use rustc_fs_util::link_or_copy;
 use rustc_session::config::OutputType;
 use rustc_span::fatal_error::FatalError;
 use rustc_target::spec::SplitDebuginfo;
 
 use crate::{GccCodegenBackend, GccContext};
+use crate::errors::CopyBitcode;
 
-pub(crate) unsafe fn codegen(cgcx: &CodegenContext<GccCodegenBackend>, _diag_handler: &Handler, module: ModuleCodegen<GccContext>, config: &ModuleConfig) -> Result<CompiledModule, FatalError> {
-    let _timer = cgcx.prof.generic_activity_with_arg("LLVM_module_codegen", &*module.name);
+pub(crate) unsafe fn codegen(cgcx: &CodegenContext<GccCodegenBackend>, diag_handler: &Handler, module: ModuleCodegen<GccContext>, config: &ModuleConfig) -> Result<CompiledModule, FatalError> {
+    let _timer = cgcx.prof.generic_activity_with_arg("GCC_module_codegen", &*module.name);
     {
         let context = &module.module_llvm.context;
 
         let module_name = module.name.clone();
+
+        let should_combine_object_files = module.module_llvm.should_combine_object_files;
+
         let module_name = Some(&module_name[..]);
 
-        let _bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
+        // NOTE: Only generate object files with GIMPLE when this environment variable is set for
+        // now because this requires a particular setup (same gcc/lto1/lto-wrapper commit as libgccjit).
+        let fat_lto = env::var("EMBED_LTO_BITCODE").as_deref() == Ok("1");
+
+        let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
         let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name);
 
-        if config.bitcode_needed() {
+        if config.bitcode_needed() && fat_lto {
+            let _timer = cgcx
+                .prof
+                .generic_activity_with_arg("GCC_module_codegen_make_bitcode", &*module.name);
+
             // TODO(antoyo)
+            /*if let Some(bitcode_filename) = bc_out.file_name() {
+                cgcx.prof.artifact_size(
+                    "llvm_bitcode",
+                    bitcode_filename.to_string_lossy(),
+                    data.len() as u64,
+                );
+            }*/
+
+            if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
+                let _timer = cgcx
+                    .prof
+                    .generic_activity_with_arg("GCC_module_codegen_emit_bitcode", &*module.name);
+                context.add_command_line_option("-flto=auto");
+                context.add_command_line_option("-flto-partition=one");
+                context.compile_to_file(OutputKind::ObjectFile, bc_out.to_str().expect("path to str"));
+            }
+
+            if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full) {
+                let _timer = cgcx
+                    .prof
+                    .generic_activity_with_arg("GCC_module_codegen_embed_bitcode", &*module.name);
+                // TODO(antoyo): maybe we should call embed_bitcode to have the proper iOS fixes?
+                //embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, data);
+
+                context.add_command_line_option("-flto=auto");
+                context.add_command_line_option("-flto-partition=one");
+                context.add_command_line_option("-ffat-lto-objects");
+                // TODO(antoyo): Send -plugin/usr/lib/gcc/x86_64-pc-linux-gnu/11.1.0/liblto_plugin.so to linker (this should be done when specifying the appropriate rustc cli argument).
+                context.compile_to_file(OutputKind::ObjectFile, bc_out.to_str().expect("path to str"));
+            }
         }
 
         if config.emit_ir {
@@ -32,7 +76,7 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<GccCodegenBackend>, _diag_han
         if config.emit_asm {
             let _timer = cgcx
                 .prof
-                .generic_activity_with_arg("LLVM_module_codegen_emit_asm", &*module.name);
+                .generic_activity_with_arg("GCC_module_codegen_emit_asm", &*module.name);
             let path = cgcx.output_filenames.temp_path(OutputType::Assembly, module_name);
             context.compile_to_file(OutputKind::Assembler, path.to_str().expect("path to str"));
         }
@@ -41,7 +85,7 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<GccCodegenBackend>, _diag_han
             EmitObj::ObjectCode(_) => {
                 let _timer = cgcx
                     .prof
-                    .generic_activity_with_arg("LLVM_module_codegen_emit_obj", &*module.name);
+                    .generic_activity_with_arg("GCC_module_codegen_emit_obj", &*module.name);
                 if env::var("CG_GCCJIT_DUMP_MODULE_NAMES").as_deref() == Ok("1") {
                     println!("Module {}", module.name);
                 }
@@ -60,11 +104,36 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<GccCodegenBackend>, _diag_han
                     context.set_debug_info(true);
                     context.dump_to_file(path, true);
                 }
-                context.compile_to_file(OutputKind::ObjectFile, obj_out.to_str().expect("path to str"));
+                if should_combine_object_files && fat_lto {
+                    context.add_command_line_option("-flto=auto");
+                    context.add_command_line_option("-flto-partition=one");
+
+                    context.add_driver_option("-Wl,-r");
+                    // NOTE: we need -nostdlib, otherwise, we get the following error:
+                    // /usr/bin/ld: cannot find -lgcc_s: No such file or directory
+                    context.add_driver_option("-nostdlib");
+                    // NOTE: without -fuse-linker-plugin, we get the following error:
+                    // lto1: internal compiler error: decompressed stream: Destination buffer is too small
+                    context.add_driver_option("-fuse-linker-plugin");
+
+                    // NOTE: this doesn't actually generate an executable. With the above flags, it combines the .o files together in another .o.
+                    context.compile_to_file(OutputKind::Executable, obj_out.to_str().expect("path to str"));
+                }
+                else {
+                    context.compile_to_file(OutputKind::ObjectFile, obj_out.to_str().expect("path to str"));
+                }
             }
 
             EmitObj::Bitcode => {
-                // TODO(antoyo)
+                debug!("copying bitcode {:?} to obj {:?}", bc_out, obj_out);
+                if let Err(err) = link_or_copy(&bc_out, &obj_out) {
+                    diag_handler.emit_err(CopyBitcode { err });
+                }
+
+                if !config.emit_bc {
+                    debug!("removing_bitcode {:?}", bc_out);
+                    ensure_removed(diag_handler, &bc_out);
+                }
             }
 
             EmitObj::None => {}
@@ -82,3 +151,18 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<GccCodegenBackend>, _diag_han
 pub(crate) fn link(_cgcx: &CodegenContext<GccCodegenBackend>, _diag_handler: &Handler, mut _modules: Vec<ModuleCodegen<GccContext>>) -> Result<ModuleCodegen<GccContext>, FatalError> {
     unimplemented!();
 }
+
+pub(crate) fn save_temp_bitcode(cgcx: &CodegenContext<GccCodegenBackend>, _module: &ModuleCodegen<GccContext>, _name: &str) {
+    if !cgcx.save_temps {
+        return;
+    }
+    unimplemented!();
+    /*unsafe {
+        let ext = format!("{}.bc", name);
+        let cgu = Some(&module.name[..]);
+        let path = cgcx.output_filenames.temp_path_ext(&ext, cgu);
+        let cstr = path_to_c_string(&path);
+        let llmod = module.module_llvm.llmod();
+        llvm::LLVMWriteBitcodeToFile(llmod, cstr.as_ptr());
+    }*/
+}
diff --git a/compiler/rustc_codegen_gcc/src/base.rs b/compiler/rustc_codegen_gcc/src/base.rs
index dcd560b3dcd..b081e9ff2fd 100644
--- a/compiler/rustc_codegen_gcc/src/base.rs
+++ b/compiler/rustc_codegen_gcc/src/base.rs
@@ -1,3 +1,4 @@
+use std::collections::HashSet;
 use std::env;
 use std::time::Instant;
 
@@ -18,6 +19,7 @@ use rustc_codegen_ssa::traits::DebugInfoMethods;
 use rustc_session::config::DebugInfo;
 use rustc_span::Symbol;
 
+use crate::{LockedTargetInfo, gcc_util};
 use crate::GccContext;
 use crate::builder::Builder;
 use crate::context::CodegenCx;
@@ -50,6 +52,7 @@ pub fn global_linkage_to_gcc(linkage: Linkage) -> GlobalKind {
 pub fn linkage_to_gcc(linkage: Linkage) -> FunctionType {
     match linkage {
         Linkage::External => FunctionType::Exported,
+        // TODO(antoyo): set the attribute externally_visible.
         Linkage::AvailableExternally => FunctionType::Extern,
         Linkage::LinkOnceAny => unimplemented!(),
         Linkage::LinkOnceODR => unimplemented!(),
@@ -63,7 +66,7 @@ pub fn linkage_to_gcc(linkage: Linkage) -> FunctionType {
     }
 }
 
-pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_integers: bool) -> (ModuleCodegen<GccContext>, u64) {
+pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, target_info: LockedTargetInfo) -> (ModuleCodegen<GccContext>, u64) {
     let prof_timer = tcx.prof.generic_activity("codegen_module");
     let start_time = Instant::now();
 
@@ -71,7 +74,7 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
     let (module, _) = tcx.dep_graph.with_task(
         dep_node,
         tcx,
-        (cgu_name, supports_128bit_integers),
+        (cgu_name, target_info),
         module_codegen,
         Some(dep_graph::hash_result),
     );
@@ -82,38 +85,28 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
     // the time we needed for codegenning it.
     let cost = time_to_codegen.as_secs() * 1_000_000_000 + time_to_codegen.subsec_nanos() as u64;
 
-    fn module_codegen(tcx: TyCtxt<'_>, (cgu_name, supports_128bit_integers): (Symbol, bool)) -> ModuleCodegen<GccContext> {
+    fn module_codegen(tcx: TyCtxt<'_>, (cgu_name, target_info): (Symbol, LockedTargetInfo)) -> ModuleCodegen<GccContext> {
         let cgu = tcx.codegen_unit(cgu_name);
         // Instantiate monomorphizations without filling out definitions yet...
-        //let llvm_module = ModuleLlvm::new(tcx, &cgu_name.as_str());
         let context = Context::default();
 
         context.add_command_line_option("-fexceptions");
         context.add_driver_option("-fexceptions");
 
+        let disabled_features: HashSet<_> = tcx.sess.opts.cg.target_feature.split(',')
+            .filter(|feature| feature.starts_with('-'))
+            .map(|string| &string[1..])
+            .collect();
+
         // TODO(antoyo): only set on x86 platforms.
         context.add_command_line_option("-masm=intel");
-        // TODO(antoyo): only add the following cli argument if the feature is supported.
-        context.add_command_line_option("-msse2");
-        context.add_command_line_option("-mavx2");
-        // FIXME(antoyo): the following causes an illegal instruction on vmovdqu64 in std_example on my CPU.
-        // Only add if the CPU supports it.
-        context.add_command_line_option("-msha");
-        context.add_command_line_option("-mpclmul");
-        context.add_command_line_option("-mfma");
-        context.add_command_line_option("-mfma4");
-        context.add_command_line_option("-m64");
-        context.add_command_line_option("-mbmi");
-        context.add_command_line_option("-mgfni");
-        //context.add_command_line_option("-mavxvnni"); // The CI doesn't support this option.
-        context.add_command_line_option("-mf16c");
-        context.add_command_line_option("-maes");
-        context.add_command_line_option("-mxsavec");
-        context.add_command_line_option("-mbmi2");
-        context.add_command_line_option("-mrtm");
-        context.add_command_line_option("-mvaes");
-        context.add_command_line_option("-mvpclmulqdq");
-        context.add_command_line_option("-mavx");
+
+        if !disabled_features.contains("avx") {
+            // NOTE: we always enable AVX because the equivalent of llvm.x86.sse2.cmp.pd in GCC for
+            // SSE2 is multiple builtins, so we use the AVX __builtin_ia32_cmppd instead.
+            // FIXME(antoyo): use the proper builtins for llvm.x86.sse2.cmp.pd and similar.
+            context.add_command_line_option("-mavx");
+        }
 
         for arg in &tcx.sess.opts.cg.llvm_args {
             context.add_command_line_option(arg);
@@ -127,6 +120,16 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
         // NOTE: Rust relies on LLVM doing wrapping on overflow.
         context.add_command_line_option("-fwrapv");
 
+        if tcx.sess.relocation_model() == rustc_target::spec::RelocModel::Static {
+            context.add_command_line_option("-mcmodel=kernel");
+            context.add_command_line_option("-fno-pie");
+        }
+
+        let target_cpu = gcc_util::target_cpu(tcx.sess);
+        if target_cpu != "generic" {
+            context.add_command_line_option(&format!("-march={}", target_cpu));
+        }
+
         if tcx.sess.opts.unstable_opts.function_sections.unwrap_or(tcx.sess.target.function_sections) {
             context.add_command_line_option("-ffunction-sections");
             context.add_command_line_option("-fdata-sections");
@@ -135,8 +138,14 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
         if env::var("CG_GCCJIT_DUMP_RTL").as_deref() == Ok("1") {
             context.add_command_line_option("-fdump-rtl-vregs");
         }
+        if env::var("CG_GCCJIT_DUMP_RTL_ALL").as_deref() == Ok("1") {
+            context.add_command_line_option("-fdump-rtl-all");
+        }
         if env::var("CG_GCCJIT_DUMP_TREE_ALL").as_deref() == Ok("1") {
-            context.add_command_line_option("-fdump-tree-all");
+            context.add_command_line_option("-fdump-tree-all-eh");
+        }
+        if env::var("CG_GCCJIT_DUMP_IPA_ALL").as_deref() == Ok("1") {
+            context.add_command_line_option("-fdump-ipa-all-eh");
         }
         if env::var("CG_GCCJIT_DUMP_CODE").as_deref() == Ok("1") {
             context.set_dump_code_on_compile(true);
@@ -152,15 +161,19 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
             context.set_keep_intermediates(true);
         }
 
+        if env::var("CG_GCCJIT_VERBOSE").as_deref() == Ok("1") {
+            context.add_driver_option("-v");
+        }
+
         // NOTE: The codegen generates unrechable blocks.
         context.set_allow_unreachable_blocks(true);
 
         {
-            let cx = CodegenCx::new(&context, cgu, tcx, supports_128bit_integers);
+            let cx = CodegenCx::new(&context, cgu, tcx, target_info.supports_128bit_int());
 
             let mono_items = cgu.items_in_deterministic_order(tcx);
-            for &(mono_item, (linkage, visibility)) in &mono_items {
-                mono_item.predefine::<Builder<'_, '_, '_>>(&cx, linkage, visibility);
+            for &(mono_item, data) in &mono_items {
+                mono_item.predefine::<Builder<'_, '_, '_>>(&cx, data.linkage, data.visibility);
             }
 
             // ... and now that we have everything pre-defined, fill out those definitions.
@@ -181,7 +194,9 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
         ModuleCodegen {
             name: cgu_name.to_string(),
             module_llvm: GccContext {
-                context
+                context,
+                should_combine_object_files: false,
+                temp_dir: None,
             },
             kind: ModuleKind::Regular,
         }
diff --git a/compiler/rustc_codegen_gcc/src/builder.rs b/compiler/rustc_codegen_gcc/src/builder.rs
index a3c8142bea2..b7841808934 100644
--- a/compiler/rustc_codegen_gcc/src/builder.rs
+++ b/compiler/rustc_codegen_gcc/src/builder.rs
@@ -27,7 +27,6 @@ use rustc_codegen_ssa::traits::{
     BaseTypeMethods,
     BuilderMethods,
     ConstMethods,
-    DerivedTypeMethods,
     LayoutTypeMethods,
     HasCodegen,
     OverflowOp,
@@ -35,6 +34,7 @@ use rustc_codegen_ssa::traits::{
 };
 use rustc_data_structures::fx::FxHashSet;
 use rustc_middle::bug;
+use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
 use rustc_middle::ty::{ParamEnv, Ty, TyCtxt};
 use rustc_middle::ty::layout::{FnAbiError, FnAbiOfHelpers, FnAbiRequest, HasParamEnv, HasTyCtxt, LayoutError, LayoutOfHelpers, TyAndLayout};
 use rustc_span::Span;
@@ -180,6 +180,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             })
             .collect();
 
+        debug_assert_eq!(casted_args.len(), args.len());
+
         Cow::Owned(casted_args)
     }
 
@@ -206,7 +208,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
         let func_name = format!("{:?}", func_ptr);
 
-        let casted_args: Vec<_> = param_types
+        let mut casted_args: Vec<_> = param_types
             .into_iter()
             .zip(args.iter())
             .enumerate()
@@ -236,20 +238,18 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             })
             .collect();
 
+        // NOTE: to take into account variadic functions.
+        for i in casted_args.len()..args.len() {
+            casted_args.push(args[i]);
+        }
+
         Cow::Owned(casted_args)
     }
 
     fn check_store(&mut self, val: RValue<'gcc>, ptr: RValue<'gcc>) -> RValue<'gcc> {
-        let dest_ptr_ty = self.cx.val_ty(ptr).make_pointer(); // TODO(antoyo): make sure make_pointer() is okay here.
         let stored_ty = self.cx.val_ty(val);
         let stored_ptr_ty = self.cx.type_ptr_to(stored_ty);
-
-        if dest_ptr_ty == stored_ptr_ty {
-            ptr
-        }
-        else {
-            self.bitcast(ptr, stored_ptr_ty)
-        }
+        self.bitcast(ptr, stored_ptr_ty)
     }
 
     pub fn current_func(&self) -> Function<'gcc> {
@@ -279,8 +279,17 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         }
     }
 
-    fn function_ptr_call(&mut self, func_ptr: RValue<'gcc>, args: &[RValue<'gcc>], _funclet: Option<&Funclet>) -> RValue<'gcc> {
-        let gcc_func = func_ptr.get_type().dyncast_function_ptr_type().expect("function ptr");
+    fn function_ptr_call(&mut self, typ: Type<'gcc>, mut func_ptr: RValue<'gcc>, args: &[RValue<'gcc>], _funclet: Option<&Funclet>) -> RValue<'gcc> {
+        let gcc_func =
+            match func_ptr.get_type().dyncast_function_ptr_type() {
+                Some(func) => func,
+                None => {
+                    // NOTE: due to opaque pointers now being used, we need to cast here.
+                    let new_func_type = typ.dyncast_function_ptr_type().expect("function ptr");
+                    func_ptr = self.context.new_cast(None, func_ptr, typ);
+                    new_func_type
+                },
+            };
         let func_name = format!("{:?}", func_ptr);
         let previous_arg_count = args.len();
         let orig_args = args;
@@ -423,16 +432,17 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         self.llbb().end_with_void_return(None)
     }
 
-    fn ret(&mut self, value: RValue<'gcc>) {
-        let value =
-            if self.structs_as_pointer.borrow().contains(&value) {
-                // NOTE: hack to workaround a limitation of the rustc API: see comment on
-                // CodegenCx.structs_as_pointer
-                value.dereference(None).to_rvalue()
-            }
-            else {
-                value
-            };
+    fn ret(&mut self, mut value: RValue<'gcc>) {
+        if self.structs_as_pointer.borrow().contains(&value) {
+            // NOTE: hack to workaround a limitation of the rustc API: see comment on
+            // CodegenCx.structs_as_pointer
+            value = value.dereference(None).to_rvalue();
+        }
+        let expected_return_type = self.current_func().get_return_type();
+        if !expected_return_type.is_compatible_with(value.get_type()) {
+            // NOTE: due to opaque pointers now being used, we need to cast here.
+            value = self.context.new_cast(None, value, expected_return_type);
+        }
         self.llbb().end_with_return(None, value);
     }
 
@@ -455,12 +465,12 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     #[cfg(feature="master")]
-    fn invoke(&mut self, typ: Type<'gcc>, _fn_abi: Option<&FnAbi<'tcx, Ty<'tcx>>>, func: RValue<'gcc>, args: &[RValue<'gcc>], then: Block<'gcc>, catch: Block<'gcc>, _funclet: Option<&Funclet>) -> RValue<'gcc> {
+    fn invoke(&mut self, typ: Type<'gcc>, fn_attrs: Option<&CodegenFnAttrs>, _fn_abi: Option<&FnAbi<'tcx, Ty<'tcx>>>, func: RValue<'gcc>, args: &[RValue<'gcc>], then: Block<'gcc>, catch: Block<'gcc>, _funclet: Option<&Funclet>) -> RValue<'gcc> {
         let try_block = self.current_func().new_block("try");
 
         let current_block = self.block.clone();
         self.block = try_block;
-        let call = self.call(typ, None, func, args, None); // TODO(antoyo): use funclet here?
+        let call = self.call(typ, fn_attrs, None, func, args, None); // TODO(antoyo): use funclet here?
         self.block = current_block;
 
         let return_value = self.current_func()
@@ -483,8 +493,8 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     #[cfg(not(feature="master"))]
-    fn invoke(&mut self, typ: Type<'gcc>, fn_abi: Option<&FnAbi<'tcx, Ty<'tcx>>>, func: RValue<'gcc>, args: &[RValue<'gcc>], then: Block<'gcc>, catch: Block<'gcc>, _funclet: Option<&Funclet>) -> RValue<'gcc> {
-        let call_site = self.call(typ, None, func, args, None);
+    fn invoke(&mut self, typ: Type<'gcc>, fn_attrs: Option<&CodegenFnAttrs>, fn_abi: Option<&FnAbi<'tcx, Ty<'tcx>>>, func: RValue<'gcc>, args: &[RValue<'gcc>], then: Block<'gcc>, catch: Block<'gcc>, _funclet: Option<&Funclet>) -> RValue<'gcc> {
+        let call_site = self.call(typ, fn_attrs, None, func, args, None);
         let condition = self.context.new_rvalue_from_int(self.bool_type, 1);
         self.llbb().end_with_conditional(None, condition, then, catch);
         if let Some(_fn_abi) = fn_abi {
@@ -646,7 +656,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn unchecked_sadd(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
-        a + b
+        self.gcc_add(a, b)
     }
 
     fn unchecked_uadd(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
@@ -654,7 +664,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn unchecked_ssub(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
-        a - b
+        self.gcc_sub(a, b)
     }
 
     fn unchecked_usub(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
@@ -663,11 +673,11 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn unchecked_smul(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
-        a * b
+        self.gcc_mul(a, b)
     }
 
     fn unchecked_umul(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
-        a * b
+        self.gcc_mul(a, b)
     }
 
     fn fadd_fast(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
@@ -718,17 +728,25 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         unimplemented!();
     }
 
-    fn load(&mut self, pointee_ty: Type<'gcc>, ptr: RValue<'gcc>, _align: Align) -> RValue<'gcc> {
+    fn load(&mut self, pointee_ty: Type<'gcc>, ptr: RValue<'gcc>, align: Align) -> RValue<'gcc> {
         let block = self.llbb();
         let function = block.get_function();
         // NOTE: instead of returning the dereference here, we have to assign it to a variable in
         // the current basic block. Otherwise, it could be used in another basic block, causing a
         // dereference after a drop, for instance.
-        // TODO(antoyo): handle align of the load instruction.
-        let ptr = self.context.new_cast(None, ptr, pointee_ty.make_pointer());
+        // FIXME(antoyo): this check that we don't call get_aligned() a second time on a type.
+        // Ideally, we shouldn't need to do this check.
+        let aligned_type =
+            if pointee_ty == self.cx.u128_type || pointee_ty == self.cx.i128_type {
+                pointee_ty
+            }
+            else {
+                pointee_ty.get_aligned(align.bytes())
+            };
+        let ptr = self.context.new_cast(None, ptr, aligned_type.make_pointer());
         let deref = ptr.dereference(None).to_rvalue();
         unsafe { RETURN_VALUE_COUNT += 1 };
-        let loaded_value = function.new_local(None, pointee_ty, &format!("loadedValue{}", unsafe { RETURN_VALUE_COUNT }));
+        let loaded_value = function.new_local(None, aligned_type, &format!("loadedValue{}", unsafe { RETURN_VALUE_COUNT }));
         block.add_assignment(None, loaded_value, deref);
         loaded_value.to_rvalue()
     }
@@ -757,7 +775,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         assert_eq!(place.llextra.is_some(), place.layout.is_unsized());
 
         if place.layout.is_zst() {
-            return OperandRef::new_zst(self, place.layout);
+            return OperandRef::zero_sized(place.layout);
         }
 
         fn scalar_load_metadata<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>, load: RValue<'gcc>, scalar: &abi::Scalar) {
@@ -796,7 +814,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
 
                 let mut load = |i, scalar: &abi::Scalar, align| {
                     let llptr = self.struct_gep(pair_type, place.llval, i as u64);
-                    let llty = place.layout.scalar_pair_element_gcc_type(self, i, false);
+                    let llty = place.layout.scalar_pair_element_gcc_type(self, i);
                     let load = self.load(llty, llptr, align);
                     scalar_load_metadata(self, load, scalar);
                     if scalar.is_bool() { self.trunc(load, self.type_i1()) } else { load }
@@ -891,7 +909,9 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
             .add_eval(None, self.context.new_call(None, atomic_store, &[ptr, value, ordering]));
     }
 
-    fn gep(&mut self, _typ: Type<'gcc>, ptr: RValue<'gcc>, indices: &[RValue<'gcc>]) -> RValue<'gcc> {
+    fn gep(&mut self, typ: Type<'gcc>, ptr: RValue<'gcc>, indices: &[RValue<'gcc>]) -> RValue<'gcc> {
+        // NOTE: due to opaque pointers now being used, we need to cast here.
+        let ptr = self.context.new_cast(None, ptr, typ.make_pointer());
         let ptr_type = ptr.get_type();
         let mut pointee_type = ptr.get_type();
         // NOTE: we cannot use array indexing here like in inbounds_gep because array indexing is
@@ -902,13 +922,21 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         // require dereferencing the pointer.
         for index in indices {
             pointee_type = pointee_type.get_pointee().expect("pointee type");
+            #[cfg(feature="master")]
+            let pointee_size = {
+                let size = self.cx.context.new_sizeof(pointee_type);
+                self.context.new_cast(None, size, index.get_type())
+            };
+            #[cfg(not(feature="master"))]
             let pointee_size = self.context.new_rvalue_from_int(index.get_type(), pointee_type.get_size() as i32);
             result = result + self.gcc_int_cast(*index * pointee_size, self.sizet_type);
         }
         self.context.new_bitcast(None, result, ptr_type)
     }
 
-    fn inbounds_gep(&mut self, _typ: Type<'gcc>, ptr: RValue<'gcc>, indices: &[RValue<'gcc>]) -> RValue<'gcc> {
+    fn inbounds_gep(&mut self, typ: Type<'gcc>, ptr: RValue<'gcc>, indices: &[RValue<'gcc>]) -> RValue<'gcc> {
+        // NOTE: due to opaque pointers now being used, we need to cast here.
+        let ptr = self.context.new_cast(None, ptr, typ.make_pointer());
         // NOTE: array indexing is always considered in bounds in GCC (TODO(antoyo): to be verified).
         let mut indices = indices.into_iter();
         let index = indices.next().expect("first index in inbounds_gep");
@@ -937,6 +965,8 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
             element.get_address(None)
         }
         else if let Some(struct_type) = value_type.is_struct() {
+            // NOTE: due to opaque pointers now being used, we need to bitcast here.
+            let ptr = self.bitcast_if_needed(ptr, value_type.make_pointer());
             ptr.dereference_field(None, struct_type.get_field(idx as i32)).get_address(None)
         }
         else {
@@ -1226,6 +1256,11 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         (value1, value2)
     }
 
+    fn filter_landing_pad(&mut self, pers_fn: RValue<'gcc>) -> (RValue<'gcc>, RValue<'gcc>) {
+        // TODO(antoyo): generate the correct landing pad
+        self.cleanup_landing_pad(pers_fn)
+    }
+
     #[cfg(feature="master")]
     fn resume(&mut self, exn0: RValue<'gcc>, _exn1: RValue<'gcc>) {
         let exn_type = exn0.get_type();
@@ -1350,7 +1385,8 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
 
     fn call(
         &mut self,
-        _typ: Type<'gcc>,
+        typ: Type<'gcc>,
+        _fn_attrs: Option<&CodegenFnAttrs>,
         fn_abi: Option<&FnAbi<'tcx, Ty<'tcx>>>,
         func: RValue<'gcc>,
         args: &[RValue<'gcc>],
@@ -1363,7 +1399,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         }
         else {
             // If it's a not function that was defined, it's a function pointer.
-            self.function_ptr_call(func, args, funclet)
+            self.function_ptr_call(typ, func, args, funclet)
         };
         if let Some(_fn_abi) = fn_abi {
             // TODO(bjorn3): Apply function attributes
@@ -1385,7 +1421,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         self.cx
     }
 
-    fn do_not_inline(&mut self, _llret: RValue<'gcc>) {
+    fn apply_attrs_to_cleanup_callsite(&mut self, _llret: RValue<'gcc>) {
         // FIXME(bjorn3): implement
     }
 
@@ -1836,7 +1872,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
         #[cfg(feature="master")]
         let (cond, element_type) = {
-            let then_val_vector_type = then_val.get_type().dyncast_vector().expect("vector type");
+            // TODO(antoyo): dyncast_vector should not require a call to unqualified.
+            let then_val_vector_type = then_val.get_type().unqualified().dyncast_vector().expect("vector type");
             let then_val_element_type = then_val_vector_type.get_element_type();
             let then_val_element_size = then_val_element_type.get_size();
 
diff --git a/compiler/rustc_codegen_gcc/src/callee.rs b/compiler/rustc_codegen_gcc/src/callee.rs
index ba1e8656208..9fc77627b1b 100644
--- a/compiler/rustc_codegen_gcc/src/callee.rs
+++ b/compiler/rustc_codegen_gcc/src/callee.rs
@@ -17,8 +17,8 @@ use crate::context::CodegenCx;
 pub fn get_fn<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, instance: Instance<'tcx>) -> Function<'gcc> {
     let tcx = cx.tcx();
 
-    assert!(!instance.substs.needs_infer());
-    assert!(!instance.substs.has_escaping_bound_vars());
+    assert!(!instance.args.has_infer());
+    assert!(!instance.args.has_escaping_bound_vars());
 
     let sym = tcx.symbol_name(instance).name;
 
@@ -100,7 +100,7 @@ pub fn get_fn<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, instance: Instance<'tcx>)
             // whether we are sharing generics or not. The important thing here is
             // that the visibility we apply to the declaration is the same one that
             // has been applied to the definition (wherever that definition may be).
-            let is_generic = instance.substs.non_erasable_generics().next().is_some();
+            let is_generic = instance.args.non_erasable_generics(tcx, instance.def_id()).next().is_some();
 
             if is_generic {
                 // This is a monomorphization. Its expected visibility depends
diff --git a/compiler/rustc_codegen_gcc/src/common.rs b/compiler/rustc_codegen_gcc/src/common.rs
index ac04b61a306..5f54cb16d8e 100644
--- a/compiler/rustc_codegen_gcc/src/common.rs
+++ b/compiler/rustc_codegen_gcc/src/common.rs
@@ -1,23 +1,25 @@
 use gccjit::LValue;
 use gccjit::{RValue, Type, ToRValue};
-use rustc_codegen_ssa::mir::place::PlaceRef;
 use rustc_codegen_ssa::traits::{
     BaseTypeMethods,
     ConstMethods,
-    DerivedTypeMethods,
     MiscMethods,
     StaticMethods,
 };
 use rustc_middle::mir::Mutability;
-use rustc_middle::ty::layout::{TyAndLayout, LayoutOf};
+use rustc_middle::ty::layout::{LayoutOf};
 use rustc_middle::mir::interpret::{ConstAllocation, GlobalAlloc, Scalar};
-use rustc_target::abi::{self, HasDataLayout, Pointer, Size};
+use rustc_target::abi::{self, HasDataLayout, Pointer};
 
 use crate::consts::const_alloc_to_gcc;
 use crate::context::CodegenCx;
 use crate::type_of::LayoutGccExt;
 
 impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
+    pub fn const_ptrcast(&self, val: RValue<'gcc>, ty: Type<'gcc>) -> RValue<'gcc> {
+        self.context.new_cast(None, val, ty)
+    }
+
     pub fn const_bytes(&self, bytes: &[u8]) -> RValue<'gcc> {
         bytes_in_context(self, bytes)
     }
@@ -110,6 +112,10 @@ impl<'gcc, 'tcx> ConstMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
         self.const_uint(self.type_u64(), i)
     }
 
+    fn const_u128(&self, i: u128) -> RValue<'gcc> {
+        self.const_uint_big(self.type_u128(), i)
+    }
+
     fn const_usize(&self, i: u64) -> RValue<'gcc> {
         let bit_size = self.data_layout().pointer_size.bits();
         if bit_size < 64 {
@@ -240,27 +246,21 @@ impl<'gcc, 'tcx> ConstMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
         const_alloc_to_gcc(self, alloc)
     }
 
-    fn from_const_alloc(&self, layout: TyAndLayout<'tcx>, alloc: ConstAllocation<'tcx>, offset: Size) -> PlaceRef<'tcx, RValue<'gcc>> {
-        assert_eq!(alloc.inner().align, layout.align.abi);
-        let ty = self.type_ptr_to(layout.gcc_type(self));
-        let value =
-            if layout.size == Size::ZERO {
-                let value = self.const_usize(alloc.inner().align.bytes());
-                self.const_bitcast(value, ty)
+    fn const_bitcast(&self, value: RValue<'gcc>, typ: Type<'gcc>) -> RValue<'gcc> {
+        if value.get_type() == self.bool_type.make_pointer() {
+            if let Some(pointee) = typ.get_pointee() {
+                if pointee.dyncast_vector().is_some() {
+                    panic!()
+                }
             }
-            else {
-                let init = const_alloc_to_gcc(self, alloc);
-                let base_addr = self.static_addr_of(init, alloc.inner().align, None);
-
-                let array = self.const_bitcast(base_addr, self.type_i8p());
-                let value = self.context.new_array_access(None, array, self.const_usize(offset.bytes())).get_address(None);
-                self.const_bitcast(value, ty)
-            };
-        PlaceRef::new_sized(value, layout)
+        }
+        // NOTE: since bitcast makes a value non-constant, don't bitcast if not necessary as some
+        // SIMD builtins require a constant value.
+        self.bitcast_if_needed(value, typ)
     }
 
-    fn const_ptrcast(&self, val: RValue<'gcc>, ty: Type<'gcc>) -> RValue<'gcc> {
-        self.context.new_cast(None, val, ty)
+    fn const_ptr_byte_offset(&self, base_addr: Self::Value, offset: abi::Size) -> Self::Value {
+        self.context.new_array_access(None, base_addr, self.const_usize(offset.bytes())).get_address(None)
     }
 }
 
diff --git a/compiler/rustc_codegen_gcc/src/consts.rs b/compiler/rustc_codegen_gcc/src/consts.rs
index 792ab8f890d..d8a1fd315c0 100644
--- a/compiler/rustc_codegen_gcc/src/consts.rs
+++ b/compiler/rustc_codegen_gcc/src/consts.rs
@@ -1,6 +1,6 @@
 #[cfg(feature = "master")]
-use gccjit::FnAttribute;
-use gccjit::{Function, GlobalKind, LValue, RValue, ToRValue, Type};
+use gccjit::{FnAttribute, VarAttribute, Visibility};
+use gccjit::{Function, GlobalKind, LValue, RValue, ToRValue};
 use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods, DerivedTypeMethods, StaticMethods};
 use rustc_middle::span_bug;
 use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, CodegenFnAttrs};
@@ -16,21 +16,6 @@ use crate::context::CodegenCx;
 use crate::errors::InvalidMinimumAlignment;
 use crate::type_of::LayoutGccExt;
 
-impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
-    pub fn const_bitcast(&self, value: RValue<'gcc>, typ: Type<'gcc>) -> RValue<'gcc> {
-        if value.get_type() == self.bool_type.make_pointer() {
-            if let Some(pointee) = typ.get_pointee() {
-                if pointee.dyncast_vector().is_some() {
-                    panic!()
-                }
-            }
-        }
-        // NOTE: since bitcast makes a value non-constant, don't bitcast if not necessary as some
-        // SIMD builtins require a constant value.
-        self.bitcast_if_needed(value, typ)
-    }
-}
-
 fn set_global_alignment<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, gv: LValue<'gcc>, mut align: Align) {
     // The target may require greater alignment for globals than the type does.
     // Note: GCC and Clang also allow `__attribute__((aligned))` on variables,
@@ -39,7 +24,7 @@ fn set_global_alignment<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, gv: LValue<'gcc>
         match Align::from_bits(min) {
             Ok(min) => align = align.max(min),
             Err(err) => {
-                cx.sess().emit_err(InvalidMinimumAlignment { err });
+                cx.sess().emit_err(InvalidMinimumAlignment { err: err.to_string() });
             }
         }
     }
@@ -249,7 +234,8 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
             );
 
             if !self.tcx.is_reachable_non_generic(def_id) {
-                // TODO(antoyo): set visibility.
+                #[cfg(feature = "master")]
+                global.add_attribute(VarAttribute::Visibility(Visibility::Hidden));
             }
 
             global
diff --git a/compiler/rustc_codegen_gcc/src/context.rs b/compiler/rustc_codegen_gcc/src/context.rs
index 661681bdb50..dcebd92a61c 100644
--- a/compiler/rustc_codegen_gcc/src/context.rs
+++ b/compiler/rustc_codegen_gcc/src/context.rs
@@ -7,6 +7,7 @@ use rustc_codegen_ssa::traits::{
     BaseTypeMethods,
     MiscMethods,
 };
+use rustc_codegen_ssa::errors as ssa_errors;
 use rustc_data_structures::base_n;
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
 use rustc_middle::span_bug;
@@ -476,10 +477,10 @@ impl<'gcc, 'tcx> LayoutOfHelpers<'tcx> for CodegenCx<'gcc, 'tcx> {
 
     #[inline]
     fn handle_layout_err(&self, err: LayoutError<'tcx>, span: Span, ty: Ty<'tcx>) -> ! {
-        if let LayoutError::SizeOverflow(_) = err {
-            self.sess().emit_fatal(respan(span, err))
+        if let LayoutError::SizeOverflow(_) | LayoutError::ReferencesError(_) = err {
+            self.sess().emit_fatal(respan(span, err.into_diagnostic()))
         } else {
-            span_bug!(span, "failed to get layout for `{}`: {}", ty, err)
+            self.tcx.sess.emit_fatal(ssa_errors::FailedToGetLayout { span, ty, err })
         }
     }
 }
@@ -499,21 +500,12 @@ impl<'gcc, 'tcx> FnAbiOfHelpers<'tcx> for CodegenCx<'gcc, 'tcx> {
         } else {
             match fn_abi_request {
                 FnAbiRequest::OfFnPtr { sig, extra_args } => {
-                    span_bug!(
-                        span,
-                        "`fn_abi_of_fn_ptr({}, {:?})` failed: {}",
-                        sig,
-                        extra_args,
-                        err
-                    );
+                    span_bug!(span, "`fn_abi_of_fn_ptr({sig}, {extra_args:?})` failed: {err:?}");
                 }
                 FnAbiRequest::OfInstance { instance, extra_args } => {
                     span_bug!(
                         span,
-                        "`fn_abi_of_instance({}, {:?})` failed: {}",
-                        instance,
-                        extra_args,
-                        err
+                        "`fn_abi_of_instance({instance}, {extra_args:?})` failed: {err:?}"
                     );
                 }
             }
diff --git a/compiler/rustc_codegen_gcc/src/coverageinfo.rs b/compiler/rustc_codegen_gcc/src/coverageinfo.rs
index 872fc2472e2..849e9886ef3 100644
--- a/compiler/rustc_codegen_gcc/src/coverageinfo.rs
+++ b/compiler/rustc_codegen_gcc/src/coverageinfo.rs
@@ -1,69 +1,11 @@
-use gccjit::RValue;
-use rustc_codegen_ssa::traits::{CoverageInfoBuilderMethods, CoverageInfoMethods};
-use rustc_hir::def_id::DefId;
-use rustc_middle::mir::coverage::{
-    CodeRegion,
-    CounterValueReference,
-    ExpressionOperandId,
-    InjectedExpressionId,
-    Op,
-};
+use rustc_codegen_ssa::traits::CoverageInfoBuilderMethods;
+use rustc_middle::mir::Coverage;
 use rustc_middle::ty::Instance;
 
 use crate::builder::Builder;
-use crate::context::CodegenCx;
 
 impl<'a, 'gcc, 'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
-    fn set_function_source_hash(
-        &mut self,
-        _instance: Instance<'tcx>,
-        _function_source_hash: u64,
-    ) -> bool {
-        unimplemented!();
-    }
-
-    fn add_coverage_counter(&mut self, _instance: Instance<'tcx>, _id: CounterValueReference, _region: CodeRegion) -> bool {
-        // TODO(antoyo)
-        false
-    }
-
-    fn add_coverage_counter_expression(&mut self, _instance: Instance<'tcx>, _id: InjectedExpressionId, _lhs: ExpressionOperandId, _op: Op, _rhs: ExpressionOperandId, _region: Option<CodeRegion>) -> bool {
-        // TODO(antoyo)
-        false
-    }
-
-    fn add_coverage_unreachable(&mut self, _instance: Instance<'tcx>, _region: CodeRegion) -> bool {
+    fn add_coverage(&mut self, _instance: Instance<'tcx>, _coverage: &Coverage) {
         // TODO(antoyo)
-        false
-    }
-}
-
-impl<'gcc, 'tcx> CoverageInfoMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
-    fn coverageinfo_finalize(&self) {
-        // TODO(antoyo)
-    }
-
-    fn get_pgo_func_name_var(&self, _instance: Instance<'tcx>) -> RValue<'gcc> {
-        unimplemented!();
-    }
-
-    /// Functions with MIR-based coverage are normally codegenned _only_ if
-    /// called. LLVM coverage tools typically expect every function to be
-    /// defined (even if unused), with at least one call to LLVM intrinsic
-    /// `instrprof.increment`.
-    ///
-    /// Codegen a small function that will never be called, with one counter
-    /// that will never be incremented.
-    ///
-    /// For used/called functions, the coverageinfo was already added to the
-    /// `function_coverage_map` (keyed by function `Instance`) during codegen.
-    /// But in this case, since the unused function was _not_ previously
-    /// codegenned, collect the coverage `CodeRegion`s from the MIR and add
-    /// them. The first `CodeRegion` is used to add a single counter, with the
-    /// same counter ID used in the injected `instrprof.increment` intrinsic
-    /// call. Since the function is never called, all other `CodeRegion`s can be
-    /// added as `unreachable_region`s.
-    fn define_unused_fn(&self, _def_id: DefId) {
-        unimplemented!();
     }
 }
diff --git a/compiler/rustc_codegen_gcc/src/debuginfo.rs b/compiler/rustc_codegen_gcc/src/debuginfo.rs
index a81585d4128..d1bfd833cd8 100644
--- a/compiler/rustc_codegen_gcc/src/debuginfo.rs
+++ b/compiler/rustc_codegen_gcc/src/debuginfo.rs
@@ -55,7 +55,7 @@ impl<'gcc, 'tcx> DebugInfoMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
         _fn_abi: &FnAbi<'tcx, Ty<'tcx>>,
         _llfn: RValue<'gcc>,
         _mir: &mir::Body<'tcx>,
-    ) -> Option<FunctionDebugContext<Self::DIScope, Self::DILocation>> {
+    ) -> Option<FunctionDebugContext<'tcx, Self::DIScope, Self::DILocation>> {
         // TODO(antoyo)
         None
     }
diff --git a/compiler/rustc_codegen_gcc/src/declare.rs b/compiler/rustc_codegen_gcc/src/declare.rs
index 4748e7e4be2..e673d0af4c7 100644
--- a/compiler/rustc_codegen_gcc/src/declare.rs
+++ b/compiler/rustc_codegen_gcc/src/declare.rs
@@ -1,4 +1,6 @@
 use gccjit::{Function, FunctionType, GlobalKind, LValue, RValue, Type};
+#[cfg(feature="master")]
+use gccjit::{FnAttribute, ToRValue};
 use rustc_codegen_ssa::traits::BaseTypeMethods;
 use rustc_middle::ty::Ty;
 use rustc_span::Symbol;
@@ -114,6 +116,44 @@ fn declare_raw_fn<'gcc>(cx: &CodegenCx<'gcc, '_>, name: &str, _callconv: () /*ll
                 .collect();
             let func = cx.context.new_function(None, cx.linkage.get(), return_type, &params, mangle_name(name), variadic);
             cx.functions.borrow_mut().insert(name.to_string(), func);
+
+            #[cfg(feature="master")]
+            if name == "rust_eh_personality" {
+                // NOTE: GCC will sometimes change the personality function set on a function from
+                // rust_eh_personality to __gcc_personality_v0 as an optimization.
+                // As such, we need to create a weak alias from __gcc_personality_v0 to
+                // rust_eh_personality in order to avoid a linker error.
+                // This needs to be weak in order to still allow using the standard
+                // __gcc_personality_v0 when the linking to it.
+                // Since aliases don't work (maybe because of a bug in LTO partitioning?), we
+                // create a wrapper function that calls rust_eh_personality.
+
+                let params: Vec<_> = param_types.into_iter().enumerate()
+                    .map(|(index, param)| cx.context.new_parameter(None, *param, &format!("param{}", index))) // TODO(antoyo): set name.
+                    .collect();
+                let gcc_func = cx.context.new_function(None, FunctionType::Exported, return_type, &params, "__gcc_personality_v0", variadic);
+
+                // We need a normal extern function for the crates that access rust_eh_personality
+                // without defining it, otherwise we'll get a compiler error.
+                //
+                // For the crate defining it, that needs to be a weak alias instead.
+                gcc_func.add_attribute(FnAttribute::Weak);
+
+                let block = gcc_func.new_block("start");
+                let mut args = vec![];
+                for param in &params {
+                    args.push(param.to_rvalue());
+                }
+                let call = cx.context.new_call(None, func, &args);
+                if return_type == cx.type_void() {
+                    block.add_eval(None, call);
+                    block.end_with_void_return(None);
+                }
+                else {
+                    block.end_with_return(None, call);
+                }
+            }
+
             func
         };
 
@@ -132,7 +172,7 @@ fn declare_raw_fn<'gcc>(cx: &CodegenCx<'gcc, '_>, name: &str, _callconv: () /*ll
 pub fn mangle_name(name: &str) -> String {
     name.replace(|char: char| {
         if !char.is_alphanumeric() && char != '_' {
-            debug_assert!("$.".contains(char), "Unsupported char in function name: {}", char);
+            debug_assert!("$.*".contains(char), "Unsupported char in function name {}: {}", name, char);
             true
         }
         else {
diff --git a/compiler/rustc_codegen_gcc/src/errors.rs b/compiler/rustc_codegen_gcc/src/errors.rs
index 9305bd1e043..4bf3b71f503 100644
--- a/compiler/rustc_codegen_gcc/src/errors.rs
+++ b/compiler/rustc_codegen_gcc/src/errors.rs
@@ -1,238 +1,125 @@
-use rustc_errors::{DiagnosticArgValue, IntoDiagnosticArg};
-use rustc_macros::Diagnostic;
-use rustc_middle::ty::Ty;
-use rustc_span::{Span, Symbol};
+use rustc_errors::{
+    DiagnosticArgValue, DiagnosticBuilder, ErrorGuaranteed, Handler, IntoDiagnostic, IntoDiagnosticArg,
+};
+use rustc_macros::{Diagnostic, Subdiagnostic};
+use rustc_span::Span;
 use std::borrow::Cow;
 
-struct ExitCode(Option<i32>);
-
-impl IntoDiagnosticArg for ExitCode {
-    fn into_diagnostic_arg(self) -> DiagnosticArgValue<'static> {
-        let ExitCode(exit_code) = self;
-        match exit_code {
-            Some(t) => t.into_diagnostic_arg(),
-            None => DiagnosticArgValue::Str(Cow::Borrowed("<signal>")),
-        }
-    }
-}
-
-#[derive(Diagnostic)]
-#[diag(codegen_gcc_invalid_monomorphization_basic_integer, code = "E0511")]
-pub(crate) struct InvalidMonomorphizationBasicInteger<'a> {
-    #[primary_span]
-    pub span: Span,
-    pub name: Symbol,
-    pub ty: Ty<'a>,
-}
-
-#[derive(Diagnostic)]
-#[diag(codegen_gcc_invalid_monomorphization_invalid_float_vector, code = "E0511")]
-pub(crate) struct InvalidMonomorphizationInvalidFloatVector<'a> {
-    #[primary_span]
-    pub span: Span,
-    pub name: Symbol,
-    pub elem_ty: &'a str,
-    pub vec_ty: Ty<'a>,
-}
-
-#[derive(Diagnostic)]
-#[diag(codegen_gcc_invalid_monomorphization_not_float, code = "E0511")]
-pub(crate) struct InvalidMonomorphizationNotFloat<'a> {
-    #[primary_span]
-    pub span: Span,
-    pub name: Symbol,
-    pub ty: Ty<'a>,
-}
-
-#[derive(Diagnostic)]
-#[diag(codegen_gcc_invalid_monomorphization_unrecognized, code = "E0511")]
-pub(crate) struct InvalidMonomorphizationUnrecognized {
-    #[primary_span]
-    pub span: Span,
-    pub name: Symbol,
-}
+use crate::fluent_generated as fluent;
 
 #[derive(Diagnostic)]
-#[diag(codegen_gcc_invalid_monomorphization_expected_signed_unsigned, code = "E0511")]
-pub(crate) struct InvalidMonomorphizationExpectedSignedUnsigned<'a> {
-    #[primary_span]
-    pub span: Span,
-    pub name: Symbol,
-    pub elem_ty: Ty<'a>,
-    pub vec_ty: Ty<'a>,
+#[diag(codegen_gcc_unknown_ctarget_feature_prefix)]
+#[note]
+pub(crate) struct UnknownCTargetFeaturePrefix<'a> {
+    pub feature: &'a str,
 }
 
 #[derive(Diagnostic)]
-#[diag(codegen_gcc_invalid_monomorphization_unsupported_element, code = "E0511")]
-pub(crate) struct InvalidMonomorphizationUnsupportedElement<'a> {
-    #[primary_span]
-    pub span: Span,
-    pub name: Symbol,
-    pub in_ty: Ty<'a>,
-    pub elem_ty: Ty<'a>,
-    pub ret_ty: Ty<'a>,
+#[diag(codegen_gcc_unknown_ctarget_feature)]
+#[note]
+pub(crate) struct UnknownCTargetFeature<'a> {
+    pub feature: &'a str,
+    #[subdiagnostic]
+    pub rust_feature: PossibleFeature<'a>,
 }
 
-#[derive(Diagnostic)]
-#[diag(codegen_gcc_invalid_monomorphization_invalid_bitmask, code = "E0511")]
-pub(crate) struct InvalidMonomorphizationInvalidBitmask<'a> {
-    #[primary_span]
-    pub span: Span,
-    pub name: Symbol,
-    pub ty: Ty<'a>,
-    pub expected_int_bits: u64,
-    pub expected_bytes: u64,
+#[derive(Subdiagnostic)]
+pub(crate) enum PossibleFeature<'a> {
+    #[help(codegen_gcc_possible_feature)]
+    Some { rust_feature: &'a str },
+    #[help(codegen_gcc_consider_filing_feature_request)]
+    None,
 }
 
-#[derive(Diagnostic)]
-#[diag(codegen_gcc_invalid_monomorphization_simd_shuffle, code = "E0511")]
-pub(crate) struct InvalidMonomorphizationSimdShuffle<'a> {
-    #[primary_span]
-    pub span: Span,
-    pub name: Symbol,
-    pub ty: Ty<'a>,
-}
+struct ExitCode(Option<i32>);
 
-#[derive(Diagnostic)]
-#[diag(codegen_gcc_invalid_monomorphization_expected_simd, code = "E0511")]
-pub(crate) struct InvalidMonomorphizationExpectedSimd<'a> {
-    #[primary_span]
-    pub span: Span,
-    pub name: Symbol,
-    pub position: &'a str,
-    pub found_ty: Ty<'a>,
+impl IntoDiagnosticArg for ExitCode {
+    fn into_diagnostic_arg(self) -> DiagnosticArgValue<'static> {
+        let ExitCode(exit_code) = self;
+        match exit_code {
+            Some(t) => t.into_diagnostic_arg(),
+            None => DiagnosticArgValue::Str(Cow::Borrowed("<signal>")),
+        }
+    }
 }
 
 #[derive(Diagnostic)]
-#[diag(codegen_gcc_invalid_monomorphization_mask_type, code = "E0511")]
-pub(crate) struct InvalidMonomorphizationMaskType<'a> {
-    #[primary_span]
-    pub span: Span,
-    pub name: Symbol,
-    pub ty: Ty<'a>,
-}
+#[diag(codegen_gcc_lto_not_supported)]
+pub(crate) struct LTONotSupported;
 
 #[derive(Diagnostic)]
-#[diag(codegen_gcc_invalid_monomorphization_return_length, code = "E0511")]
-pub(crate) struct InvalidMonomorphizationReturnLength<'a> {
+#[diag(codegen_gcc_unwinding_inline_asm)]
+pub(crate) struct UnwindingInlineAsm {
     #[primary_span]
     pub span: Span,
-    pub name: Symbol,
-    pub in_len: u64,
-    pub ret_ty: Ty<'a>,
-    pub out_len: u64,
 }
 
 #[derive(Diagnostic)]
-#[diag(codegen_gcc_invalid_monomorphization_return_length_input_type, code = "E0511")]
-pub(crate) struct InvalidMonomorphizationReturnLengthInputType<'a> {
-    #[primary_span]
-    pub span: Span,
-    pub name: Symbol,
-    pub in_len: u64,
-    pub in_ty: Ty<'a>,
-    pub ret_ty: Ty<'a>,
-    pub out_len: u64,
+#[diag(codegen_gcc_invalid_minimum_alignment)]
+pub(crate) struct InvalidMinimumAlignment {
+    pub err: String,
 }
 
 #[derive(Diagnostic)]
-#[diag(codegen_gcc_invalid_monomorphization_return_element, code = "E0511")]
-pub(crate) struct InvalidMonomorphizationReturnElement<'a> {
+#[diag(codegen_gcc_tied_target_features)]
+#[help]
+pub(crate) struct TiedTargetFeatures {
     #[primary_span]
     pub span: Span,
-    pub name: Symbol,
-    pub in_elem: Ty<'a>,
-    pub in_ty: Ty<'a>,
-    pub ret_ty: Ty<'a>,
-    pub out_ty: Ty<'a>,
+    pub features: String,
 }
 
 #[derive(Diagnostic)]
-#[diag(codegen_gcc_invalid_monomorphization_return_type, code = "E0511")]
-pub(crate) struct InvalidMonomorphizationReturnType<'a> {
-    #[primary_span]
-    pub span: Span,
-    pub name: Symbol,
-    pub in_elem: Ty<'a>,
-    pub in_ty: Ty<'a>,
-    pub ret_ty: Ty<'a>,
+#[diag(codegen_gcc_copy_bitcode)]
+pub(crate) struct CopyBitcode {
+    pub err: std::io::Error,
 }
 
 #[derive(Diagnostic)]
-#[diag(codegen_gcc_invalid_monomorphization_inserted_type, code = "E0511")]
-pub(crate) struct InvalidMonomorphizationInsertedType<'a> {
-    #[primary_span]
-    pub span: Span,
-    pub name: Symbol,
-    pub in_elem: Ty<'a>,
-    pub in_ty: Ty<'a>,
-    pub out_ty: Ty<'a>,
-}
+#[diag(codegen_gcc_dynamic_linking_with_lto)]
+#[note]
+pub(crate) struct DynamicLinkingWithLTO;
 
 #[derive(Diagnostic)]
-#[diag(codegen_gcc_invalid_monomorphization_return_integer_type, code = "E0511")]
-pub(crate) struct InvalidMonomorphizationReturnIntegerType<'a> {
-    #[primary_span]
-    pub span: Span,
-    pub name: Symbol,
-    pub ret_ty: Ty<'a>,
-    pub out_ty: Ty<'a>,
+#[diag(codegen_gcc_load_bitcode)]
+pub(crate) struct LoadBitcode {
+    name: String,
 }
 
 #[derive(Diagnostic)]
-#[diag(codegen_gcc_invalid_monomorphization_mismatched_lengths, code = "E0511")]
-pub(crate) struct InvalidMonomorphizationMismatchedLengths {
-    #[primary_span]
-    pub span: Span,
-    pub name: Symbol,
-    pub m_len: u64,
-    pub v_len: u64,
-}
+#[diag(codegen_gcc_lto_disallowed)]
+pub(crate) struct LtoDisallowed;
 
 #[derive(Diagnostic)]
-#[diag(codegen_gcc_invalid_monomorphization_unsupported_cast, code = "E0511")]
-pub(crate) struct InvalidMonomorphizationUnsupportedCast<'a> {
-    #[primary_span]
-    pub span: Span,
-    pub name: Symbol,
-    pub in_ty: Ty<'a>,
-    pub in_elem: Ty<'a>,
-    pub ret_ty: Ty<'a>,
-    pub out_elem: Ty<'a>,
-}
+#[diag(codegen_gcc_lto_dylib)]
+pub(crate) struct LtoDylib;
 
 #[derive(Diagnostic)]
-#[diag(codegen_gcc_invalid_monomorphization_unsupported_operation, code = "E0511")]
-pub(crate) struct InvalidMonomorphizationUnsupportedOperation<'a> {
-    #[primary_span]
-    pub span: Span,
-    pub name: Symbol,
-    pub in_ty: Ty<'a>,
-    pub in_elem: Ty<'a>,
+#[diag(codegen_gcc_lto_bitcode_from_rlib)]
+pub(crate) struct LtoBitcodeFromRlib {
+    pub gcc_err: String,
 }
 
-#[derive(Diagnostic)]
-#[diag(codegen_gcc_lto_not_supported)]
-pub(crate) struct LTONotSupported;
-
-#[derive(Diagnostic)]
-#[diag(codegen_gcc_unwinding_inline_asm)]
-pub(crate) struct UnwindingInlineAsm {
-    #[primary_span]
-    pub span: Span,
+pub(crate) struct TargetFeatureDisableOrEnable<'a> {
+    pub features: &'a [&'a str],
+    pub span: Option<Span>,
+    pub missing_features: Option<MissingFeatures>,
 }
 
-#[derive(Diagnostic)]
-#[diag(codegen_gcc_invalid_minimum_alignment)]
-pub(crate) struct InvalidMinimumAlignment {
-    pub err: String,
-}
+#[derive(Subdiagnostic)]
+#[help(codegen_gcc_missing_features)]
+pub(crate) struct MissingFeatures;
 
-#[derive(Diagnostic)]
-#[diag(codegen_gcc_tied_target_features)]
-#[help]
-pub(crate) struct TiedTargetFeatures {
-    #[primary_span]
-    pub span: Span,
-    pub features: String,
+impl IntoDiagnostic<'_, ErrorGuaranteed> for TargetFeatureDisableOrEnable<'_> {
+    fn into_diagnostic(self, sess: &'_ Handler) -> DiagnosticBuilder<'_, ErrorGuaranteed> {
+        let mut diag = sess.struct_err(fluent::codegen_gcc_target_feature_disable_or_enable);
+        if let Some(span) = self.span {
+            diag.set_span(span);
+        };
+        if let Some(missing_features) = self.missing_features {
+            diag.subdiagnostic(missing_features);
+        }
+        diag.set_arg("features", self.features.join(", "));
+        diag
+    }
 }
diff --git a/compiler/rustc_codegen_gcc/src/gcc_util.rs b/compiler/rustc_codegen_gcc/src/gcc_util.rs
new file mode 100644
index 00000000000..0514c9988e0
--- /dev/null
+++ b/compiler/rustc_codegen_gcc/src/gcc_util.rs
@@ -0,0 +1,223 @@
+#[cfg(feature="master")]
+use gccjit::Context;
+use smallvec::{smallvec, SmallVec};
+
+use rustc_codegen_ssa::target_features::{
+    supported_target_features, tied_target_features, RUSTC_SPECIFIC_FEATURES,
+};
+use rustc_data_structures::fx::FxHashMap;
+use rustc_middle::bug;
+use rustc_session::Session;
+
+use crate::errors::{PossibleFeature, TargetFeatureDisableOrEnable, UnknownCTargetFeature, UnknownCTargetFeaturePrefix};
+
+/// The list of GCC features computed from CLI flags (`-Ctarget-cpu`, `-Ctarget-feature`,
+/// `--target` and similar).
+pub(crate) fn global_gcc_features(sess: &Session, diagnostics: bool) -> Vec<String> {
+    // Features that come earlier are overridden by conflicting features later in the string.
+    // Typically we'll want more explicit settings to override the implicit ones, so:
+    //
+    // * Features from -Ctarget-cpu=*; are overridden by [^1]
+    // * Features implied by --target; are overridden by
+    // * Features from -Ctarget-feature; are overridden by
+    // * function specific features.
+    //
+    // [^1]: target-cpu=native is handled here, other target-cpu values are handled implicitly
+    // through GCC march implementation.
+    //
+    // FIXME(nagisa): it isn't clear what's the best interaction between features implied by
+    // `-Ctarget-cpu` and `--target` are. On one hand, you'd expect CLI arguments to always
+    // override anything that's implicit, so e.g. when there's no `--target` flag, features implied
+    // the host target are overridden by `-Ctarget-cpu=*`. On the other hand, what about when both
+    // `--target` and `-Ctarget-cpu=*` are specified? Both then imply some target features and both
+    // flags are specified by the user on the CLI. It isn't as clear-cut which order of precedence
+    // should be taken in cases like these.
+    let mut features = vec![];
+
+    // Features implied by an implicit or explicit `--target`.
+    features.extend(
+        sess.target
+            .features
+            .split(',')
+            .filter(|v| !v.is_empty() && backend_feature_name(v).is_some())
+            .map(String::from),
+    );
+
+    // -Ctarget-features
+    let supported_features = supported_target_features(sess);
+    let mut featsmap = FxHashMap::default();
+    let feats = sess.opts.cg.target_feature
+        .split(',')
+        .filter_map(|s| {
+            let enable_disable = match s.chars().next() {
+                None => return None,
+                Some(c @ ('+' | '-')) => c,
+                Some(_) => {
+                    if diagnostics {
+                        sess.emit_warning(UnknownCTargetFeaturePrefix { feature: s });
+                    }
+                    return None;
+                }
+            };
+
+            let feature = backend_feature_name(s)?;
+            // Warn against use of GCC specific feature names on the CLI.
+            if diagnostics && !supported_features.iter().any(|&(v, _)| v == feature) {
+                let rust_feature = supported_features.iter().find_map(|&(rust_feature, _)| {
+                    let gcc_features = to_gcc_features(sess, rust_feature);
+                    if gcc_features.contains(&feature) && !gcc_features.contains(&rust_feature) {
+                        Some(rust_feature)
+                    } else {
+                        None
+                    }
+                });
+                let unknown_feature =
+                    if let Some(rust_feature) = rust_feature {
+                        UnknownCTargetFeature {
+                            feature,
+                            rust_feature: PossibleFeature::Some { rust_feature },
+                        }
+                    }
+                    else {
+                        UnknownCTargetFeature { feature, rust_feature: PossibleFeature::None }
+                    };
+                sess.emit_warning(unknown_feature);
+            }
+
+            if diagnostics {
+                // FIXME(nagisa): figure out how to not allocate a full hashset here.
+                featsmap.insert(feature, enable_disable == '+');
+            }
+
+            // rustc-specific features do not get passed down to GCC…
+            if RUSTC_SPECIFIC_FEATURES.contains(&feature) {
+                return None;
+            }
+            // ... otherwise though we run through `to_gcc_features` when
+            // passing requests down to GCC. This means that all in-language
+            // features also work on the command line instead of having two
+            // different names when the GCC name and the Rust name differ.
+            Some(to_gcc_features(sess, feature)
+                .iter()
+                .flat_map(|feat| to_gcc_features(sess, feat).into_iter())
+                .map(|feature| {
+                    if enable_disable == '-' {
+                        format!("-{}", feature)
+                    }
+                    else {
+                        feature.to_string()
+                    }
+                })
+                .collect::<Vec<_>>(),
+            )
+        })
+        .flatten();
+    features.extend(feats);
+
+    if diagnostics {
+        if let Some(f) = check_tied_features(sess, &featsmap) {
+            sess.emit_err(TargetFeatureDisableOrEnable {
+                features: f,
+                span: None,
+                missing_features: None,
+            });
+        }
+    }
+
+    features
+}
+
+/// Returns a feature name for the given `+feature` or `-feature` string.
+///
+/// Only allows features that are backend specific (i.e. not [`RUSTC_SPECIFIC_FEATURES`].)
+fn backend_feature_name(s: &str) -> Option<&str> {
+    // features must start with a `+` or `-`.
+    let feature = s.strip_prefix(&['+', '-'][..]).unwrap_or_else(|| {
+        bug!("target feature `{}` must begin with a `+` or `-`", s);
+    });
+    // Rustc-specific feature requests like `+crt-static` or `-crt-static`
+    // are not passed down to GCC.
+    if RUSTC_SPECIFIC_FEATURES.contains(&feature) {
+        return None;
+    }
+    Some(feature)
+}
+
+// To find a list of GCC's names, check https://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
+pub fn to_gcc_features<'a>(sess: &Session, s: &'a str) -> SmallVec<[&'a str; 2]> {
+    let arch = if sess.target.arch == "x86_64" { "x86" } else { &*sess.target.arch };
+    match (arch, s) {
+        ("x86", "sse4.2") => smallvec!["sse4.2", "crc32"],
+        ("x86", "pclmulqdq") => smallvec!["pclmul"],
+        ("x86", "rdrand") => smallvec!["rdrnd"],
+        ("x86", "bmi1") => smallvec!["bmi"],
+        ("x86", "cmpxchg16b") => smallvec!["cx16"],
+        ("x86", "avx512vaes") => smallvec!["vaes"],
+        ("x86", "avx512gfni") => smallvec!["gfni"],
+        ("x86", "avx512vpclmulqdq") => smallvec!["vpclmulqdq"],
+        // NOTE: seems like GCC requires 'avx512bw' for 'avx512vbmi2'.
+        ("x86", "avx512vbmi2") => smallvec!["avx512vbmi2", "avx512bw"],
+        // NOTE: seems like GCC requires 'avx512bw' for 'avx512bitalg'.
+        ("x86", "avx512bitalg") => smallvec!["avx512bitalg", "avx512bw"],
+        ("aarch64", "rcpc2") => smallvec!["rcpc-immo"],
+        ("aarch64", "dpb") => smallvec!["ccpp"],
+        ("aarch64", "dpb2") => smallvec!["ccdp"],
+        ("aarch64", "frintts") => smallvec!["fptoint"],
+        ("aarch64", "fcma") => smallvec!["complxnum"],
+        ("aarch64", "pmuv3") => smallvec!["perfmon"],
+        ("aarch64", "paca") => smallvec!["pauth"],
+        ("aarch64", "pacg") => smallvec!["pauth"],
+        // Rust ties fp and neon together. In GCC neon implicitly enables fp,
+        // but we manually enable neon when a feature only implicitly enables fp
+        ("aarch64", "f32mm") => smallvec!["f32mm", "neon"],
+        ("aarch64", "f64mm") => smallvec!["f64mm", "neon"],
+        ("aarch64", "fhm") => smallvec!["fp16fml", "neon"],
+        ("aarch64", "fp16") => smallvec!["fullfp16", "neon"],
+        ("aarch64", "jsconv") => smallvec!["jsconv", "neon"],
+        ("aarch64", "sve") => smallvec!["sve", "neon"],
+        ("aarch64", "sve2") => smallvec!["sve2", "neon"],
+        ("aarch64", "sve2-aes") => smallvec!["sve2-aes", "neon"],
+        ("aarch64", "sve2-sm4") => smallvec!["sve2-sm4", "neon"],
+        ("aarch64", "sve2-sha3") => smallvec!["sve2-sha3", "neon"],
+        ("aarch64", "sve2-bitperm") => smallvec!["sve2-bitperm", "neon"],
+        (_, s) => smallvec![s],
+    }
+}
+
+// Given a map from target_features to whether they are enabled or disabled,
+// ensure only valid combinations are allowed.
+pub fn check_tied_features(sess: &Session, features: &FxHashMap<&str, bool>) -> Option<&'static [&'static str]> {
+    for tied in tied_target_features(sess) {
+        // Tied features must be set to the same value, or not set at all
+        let mut tied_iter = tied.iter();
+        let enabled = features.get(tied_iter.next().unwrap());
+        if tied_iter.any(|feature| enabled != features.get(feature)) {
+            return Some(tied);
+        }
+    }
+    None
+}
+
+fn handle_native(name: &str) -> &str {
+    if name != "native" {
+        return name;
+    }
+
+    #[cfg(feature="master")]
+    {
+        // Get the native arch.
+        let context = Context::default();
+        context.get_target_info().arch().unwrap()
+            .to_str()
+            .unwrap()
+    }
+    #[cfg(not(feature="master"))]
+    unimplemented!();
+}
+
+pub fn target_cpu(sess: &Session) -> &str {
+    match sess.opts.cg.target_cpu {
+        Some(ref name) => handle_native(name),
+        None => handle_native(sess.target.cpu.as_ref()),
+    }
+}
diff --git a/compiler/rustc_codegen_gcc/src/int.rs b/compiler/rustc_codegen_gcc/src/int.rs
index 0cf1204791d..58e0dd56f38 100644
--- a/compiler/rustc_codegen_gcc/src/int.rs
+++ b/compiler/rustc_codegen_gcc/src/int.rs
@@ -36,7 +36,6 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             self.cx.context.new_unary_op(None, operation, typ, a)
         }
         else {
-            // TODO(antoyo): use __negdi2 and __negti2 instead?
             let element_type = typ.dyncast_array().expect("element type");
             let values = [
                 self.cx.context.new_unary_op(None, UnaryOp::BitwiseNegate, element_type, self.low(a)),
@@ -52,9 +51,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             self.cx.context.new_unary_op(None, UnaryOp::Minus, a.get_type(), a)
         }
         else {
-            let param_a = self.context.new_parameter(None, a_type, "a");
-            let func = self.context.new_function(None, FunctionType::Extern, a_type, &[param_a], "__negti2", false);
-            self.context.new_call(None, func, &[a])
+            self.gcc_add(self.gcc_not(a), self.gcc_int(a_type, 1))
         }
     }
 
@@ -353,23 +350,63 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         (res.dereference(None).to_rvalue(), overflow)
     }
 
-    pub fn gcc_icmp(&self, op: IntPredicate, mut lhs: RValue<'gcc>, mut rhs: RValue<'gcc>) -> RValue<'gcc> {
+    pub fn gcc_icmp(&mut self, op: IntPredicate, mut lhs: RValue<'gcc>, mut rhs: RValue<'gcc>) -> RValue<'gcc> {
         let a_type = lhs.get_type();
         let b_type = rhs.get_type();
         if self.is_non_native_int_type(a_type) || self.is_non_native_int_type(b_type) {
-            let signed = a_type.is_compatible_with(self.i128_type);
-            let sign =
-                if signed {
-                    ""
-                }
-                else {
-                    "u"
-                };
-            let func_name = format!("__{}cmpti2", sign);
-            let param_a = self.context.new_parameter(None, a_type, "a");
-            let param_b = self.context.new_parameter(None, b_type, "b");
-            let func = self.context.new_function(None, FunctionType::Extern, self.int_type, &[param_a, param_b], func_name, false);
-            let cmp = self.context.new_call(None, func, &[lhs, rhs]);
+            // This algorithm is based on compiler-rt's __cmpti2:
+            // https://github.com/llvm-mirror/compiler-rt/blob/f0745e8476f069296a7c71accedd061dce4cdf79/lib/builtins/cmpti2.c#L21
+            let result = self.current_func().new_local(None, self.int_type, "icmp_result");
+            let block1 = self.current_func().new_block("block1");
+            let block2 = self.current_func().new_block("block2");
+            let block3 = self.current_func().new_block("block3");
+            let block4 = self.current_func().new_block("block4");
+            let block5 = self.current_func().new_block("block5");
+            let block6 = self.current_func().new_block("block6");
+            let block7 = self.current_func().new_block("block7");
+            let block8 = self.current_func().new_block("block8");
+            let after = self.current_func().new_block("after");
+
+            let native_int_type = a_type.dyncast_array().expect("get element type");
+            // NOTE: cast low to its unsigned type in order to perform a comparison correctly (e.g.
+            // the sign is only on high).
+            let unsigned_type = native_int_type.to_unsigned(&self.cx);
+
+            let lhs_low = self.context.new_cast(None, self.low(lhs), unsigned_type);
+            let rhs_low = self.context.new_cast(None, self.low(rhs), unsigned_type);
+
+            let condition = self.context.new_comparison(None, ComparisonOp::LessThan, self.high(lhs), self.high(rhs));
+            self.llbb().end_with_conditional(None, condition, block1, block2);
+
+            block1.add_assignment(None, result, self.context.new_rvalue_zero(self.int_type));
+            block1.end_with_jump(None, after);
+
+            let condition = self.context.new_comparison(None, ComparisonOp::GreaterThan, self.high(lhs), self.high(rhs));
+            block2.end_with_conditional(None, condition, block3, block4);
+
+            block3.add_assignment(None, result, self.context.new_rvalue_from_int(self.int_type, 2));
+            block3.end_with_jump(None, after);
+
+            let condition = self.context.new_comparison(None, ComparisonOp::LessThan, lhs_low, rhs_low);
+            block4.end_with_conditional(None, condition, block5, block6);
+
+            block5.add_assignment(None, result, self.context.new_rvalue_zero(self.int_type));
+            block5.end_with_jump(None, after);
+
+            let condition = self.context.new_comparison(None, ComparisonOp::GreaterThan, lhs_low, rhs_low);
+            block6.end_with_conditional(None, condition, block7, block8);
+
+            block7.add_assignment(None, result, self.context.new_rvalue_from_int(self.int_type, 2));
+            block7.end_with_jump(None, after);
+
+            block8.add_assignment(None, result, self.context.new_rvalue_one(self.int_type));
+            block8.end_with_jump(None, after);
+
+            // NOTE: since jumps were added in a place rustc does not expect, the current block in the
+            // state need to be updated.
+            self.switch_to_block(after);
+
+            let cmp = result.to_rvalue();
             let (op, limit) =
                 match op {
                     IntPredicate::IntEQ => {
@@ -546,7 +583,12 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
     }
 
     pub fn gcc_uint(&self, typ: Type<'gcc>, int: u64) -> RValue<'gcc> {
-        if self.is_native_int_type_or_bool(typ) {
+        if typ.is_u128(self) {
+            // FIXME(antoyo): libgccjit cannot create 128-bit values yet.
+            let num = self.context.new_rvalue_from_long(self.u64_type, int as i64);
+            self.gcc_int_cast(num, typ)
+        }
+        else if self.is_native_int_type_or_bool(typ) {
             self.context.new_rvalue_from_long(typ, u64::try_from(int).expect("u64::try_from") as i64)
         }
         else {
@@ -572,6 +614,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
             }
         }
         else if typ.is_i128(self) {
+            // FIXME(antoyo): libgccjit cannot create 128-bit values yet.
             let num = self.context.new_rvalue_from_long(self.u64_type, num as u64 as i64);
             self.gcc_int_cast(num, typ)
         }
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs b/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
index 8a4559355ea..e01299d32fd 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
@@ -2254,6 +2254,42 @@ match name {
     "llvm.hexagon.prefetch" => "__builtin_HEXAGON_prefetch",
     "llvm.hexagon.vmemcpy" => "__builtin_hexagon_vmemcpy",
     "llvm.hexagon.vmemset" => "__builtin_hexagon_vmemset",
+    // loongarch
+    "llvm.loongarch.asrtgt.d" => "__builtin_loongarch_asrtgt_d",
+    "llvm.loongarch.asrtle.d" => "__builtin_loongarch_asrtle_d",
+    "llvm.loongarch.break" => "__builtin_loongarch_break",
+    "llvm.loongarch.cacop.d" => "__builtin_loongarch_cacop_d",
+    "llvm.loongarch.cacop.w" => "__builtin_loongarch_cacop_w",
+    "llvm.loongarch.cpucfg" => "__builtin_loongarch_cpucfg",
+    "llvm.loongarch.crc.w.b.w" => "__builtin_loongarch_crc_w_b_w",
+    "llvm.loongarch.crc.w.d.w" => "__builtin_loongarch_crc_w_d_w",
+    "llvm.loongarch.crc.w.h.w" => "__builtin_loongarch_crc_w_h_w",
+    "llvm.loongarch.crc.w.w.w" => "__builtin_loongarch_crc_w_w_w",
+    "llvm.loongarch.crcc.w.b.w" => "__builtin_loongarch_crcc_w_b_w",
+    "llvm.loongarch.crcc.w.d.w" => "__builtin_loongarch_crcc_w_d_w",
+    "llvm.loongarch.crcc.w.h.w" => "__builtin_loongarch_crcc_w_h_w",
+    "llvm.loongarch.crcc.w.w.w" => "__builtin_loongarch_crcc_w_w_w",
+    "llvm.loongarch.csrrd.d" => "__builtin_loongarch_csrrd_d",
+    "llvm.loongarch.csrrd.w" => "__builtin_loongarch_csrrd_w",
+    "llvm.loongarch.csrwr.d" => "__builtin_loongarch_csrwr_d",
+    "llvm.loongarch.csrwr.w" => "__builtin_loongarch_csrwr_w",
+    "llvm.loongarch.csrxchg.d" => "__builtin_loongarch_csrxchg_d",
+    "llvm.loongarch.csrxchg.w" => "__builtin_loongarch_csrxchg_w",
+    "llvm.loongarch.dbar" => "__builtin_loongarch_dbar",
+    "llvm.loongarch.ibar" => "__builtin_loongarch_ibar",
+    "llvm.loongarch.iocsrrd.b" => "__builtin_loongarch_iocsrrd_b",
+    "llvm.loongarch.iocsrrd.d" => "__builtin_loongarch_iocsrrd_d",
+    "llvm.loongarch.iocsrrd.h" => "__builtin_loongarch_iocsrrd_h",
+    "llvm.loongarch.iocsrrd.w" => "__builtin_loongarch_iocsrrd_w",
+    "llvm.loongarch.iocsrwr.b" => "__builtin_loongarch_iocsrwr_b",
+    "llvm.loongarch.iocsrwr.d" => "__builtin_loongarch_iocsrwr_d",
+    "llvm.loongarch.iocsrwr.h" => "__builtin_loongarch_iocsrwr_h",
+    "llvm.loongarch.iocsrwr.w" => "__builtin_loongarch_iocsrwr_w",
+    "llvm.loongarch.lddir.d" => "__builtin_loongarch_lddir_d",
+    "llvm.loongarch.ldpte.d" => "__builtin_loongarch_ldpte_d",
+    "llvm.loongarch.movfcsr2gr" => "__builtin_loongarch_movfcsr2gr",
+    "llvm.loongarch.movgr2fcsr" => "__builtin_loongarch_movgr2fcsr",
+    "llvm.loongarch.syscall" => "__builtin_loongarch_syscall",
     // mips
     "llvm.mips.absq.s.ph" => "__builtin_mips_absq_s_ph",
     "llvm.mips.absq.s.qb" => "__builtin_mips_absq_s_qb",
@@ -2954,6 +2990,8 @@ match name {
     "llvm.nvvm.barrier0.and" => "__nvvm_bar0_and",
     "llvm.nvvm.barrier0.or" => "__nvvm_bar0_or",
     "llvm.nvvm.barrier0.popc" => "__nvvm_bar0_popc",
+    "llvm.nvvm.bf2h.rn" => "__nvvm_bf2h_rn",
+    "llvm.nvvm.bf2h.rn.ftz" => "__nvvm_bf2h_rn_ftz",
     "llvm.nvvm.bitcast.d2ll" => "__nvvm_bitcast_d2ll",
     "llvm.nvvm.bitcast.f2i" => "__nvvm_bitcast_f2i",
     "llvm.nvvm.bitcast.i2f" => "__nvvm_bitcast_i2f",
@@ -2967,10 +3005,6 @@ match name {
     "llvm.nvvm.clz.ll" => "__nvvm_clz_ll",
     "llvm.nvvm.cos.approx.f" => "__nvvm_cos_approx_f",
     "llvm.nvvm.cos.approx.ftz.f" => "__nvvm_cos_approx_ftz_f",
-    "llvm.nvvm.cp.async.ca.shared.global.16" => "__nvvm_cp_async_ca_shared_global_16",
-    "llvm.nvvm.cp.async.ca.shared.global.4" => "__nvvm_cp_async_ca_shared_global_4",
-    "llvm.nvvm.cp.async.ca.shared.global.8" => "__nvvm_cp_async_ca_shared_global_8",
-    "llvm.nvvm.cp.async.cg.shared.global.16" => "__nvvm_cp_async_cg_shared_global_16",
     "llvm.nvvm.cp.async.commit.group" => "__nvvm_cp_async_commit_group",
     "llvm.nvvm.cp.async.mbarrier.arrive" => "__nvvm_cp_async_mbarrier_arrive",
     "llvm.nvvm.cp.async.mbarrier.arrive.noinc" => "__nvvm_cp_async_mbarrier_arrive_noinc",
@@ -3020,8 +3054,6 @@ match name {
     "llvm.nvvm.div.rz.ftz.f" => "__nvvm_div_rz_ftz_f",
     "llvm.nvvm.ex2.approx.d" => "__nvvm_ex2_approx_d",
     "llvm.nvvm.ex2.approx.f" => "__nvvm_ex2_approx_f",
-    "llvm.nvvm.ex2.approx.f16" => "__nvvm_ex2_approx_f16",
-    "llvm.nvvm.ex2.approx.f16x2" => "__nvvm_ex2_approx_f16x2",
     "llvm.nvvm.ex2.approx.ftz.f" => "__nvvm_ex2_approx_ftz_f",
     "llvm.nvvm.f2bf16.rn" => "__nvvm_f2bf16_rn",
     "llvm.nvvm.f2bf16.rn.relu" => "__nvvm_f2bf16_rn_relu",
@@ -3083,21 +3115,17 @@ match name {
     "llvm.nvvm.fma.rn.bf16x2" => "__nvvm_fma_rn_bf16x2",
     "llvm.nvvm.fma.rn.d" => "__nvvm_fma_rn_d",
     "llvm.nvvm.fma.rn.f" => "__nvvm_fma_rn_f",
-    "llvm.nvvm.fma.rn.f16" => "__nvvm_fma_rn_f16",
-    "llvm.nvvm.fma.rn.f16x2" => "__nvvm_fma_rn_f16x2",
+    "llvm.nvvm.fma.rn.ftz.bf16" => "__nvvm_fma_rn_ftz_bf16",
+    "llvm.nvvm.fma.rn.ftz.bf16x2" => "__nvvm_fma_rn_ftz_bf16x2",
     "llvm.nvvm.fma.rn.ftz.f" => "__nvvm_fma_rn_ftz_f",
-    "llvm.nvvm.fma.rn.ftz.f16" => "__nvvm_fma_rn_ftz_f16",
-    "llvm.nvvm.fma.rn.ftz.f16x2" => "__nvvm_fma_rn_ftz_f16x2",
-    "llvm.nvvm.fma.rn.ftz.relu.f16" => "__nvvm_fma_rn_ftz_relu_f16",
-    "llvm.nvvm.fma.rn.ftz.relu.f16x2" => "__nvvm_fma_rn_ftz_relu_f16x2",
-    "llvm.nvvm.fma.rn.ftz.sat.f16" => "__nvvm_fma_rn_ftz_sat_f16",
-    "llvm.nvvm.fma.rn.ftz.sat.f16x2" => "__nvvm_fma_rn_ftz_sat_f16x2",
+    "llvm.nvvm.fma.rn.ftz.relu.bf16" => "__nvvm_fma_rn_ftz_relu_bf16",
+    "llvm.nvvm.fma.rn.ftz.relu.bf16x2" => "__nvvm_fma_rn_ftz_relu_bf16x2",
+    "llvm.nvvm.fma.rn.ftz.sat.bf16" => "__nvvm_fma_rn_ftz_sat_bf16",
+    "llvm.nvvm.fma.rn.ftz.sat.bf16x2" => "__nvvm_fma_rn_ftz_sat_bf16x2",
     "llvm.nvvm.fma.rn.relu.bf16" => "__nvvm_fma_rn_relu_bf16",
     "llvm.nvvm.fma.rn.relu.bf16x2" => "__nvvm_fma_rn_relu_bf16x2",
-    "llvm.nvvm.fma.rn.relu.f16" => "__nvvm_fma_rn_relu_f16",
-    "llvm.nvvm.fma.rn.relu.f16x2" => "__nvvm_fma_rn_relu_f16x2",
-    "llvm.nvvm.fma.rn.sat.f16" => "__nvvm_fma_rn_sat_f16",
-    "llvm.nvvm.fma.rn.sat.f16x2" => "__nvvm_fma_rn_sat_f16x2",
+    "llvm.nvvm.fma.rn.sat.bf16" => "__nvvm_fma_rn_sat_bf16",
+    "llvm.nvvm.fma.rn.sat.bf16x2" => "__nvvm_fma_rn_sat_bf16x2",
     "llvm.nvvm.fma.rp.d" => "__nvvm_fma_rp_d",
     "llvm.nvvm.fma.rp.f" => "__nvvm_fma_rp_f",
     "llvm.nvvm.fma.rp.ftz.f" => "__nvvm_fma_rp_ftz_f",
@@ -3108,68 +3136,52 @@ match name {
     "llvm.nvvm.fmax.bf16x2" => "__nvvm_fmax_bf16x2",
     "llvm.nvvm.fmax.d" => "__nvvm_fmax_d",
     "llvm.nvvm.fmax.f" => "__nvvm_fmax_f",
-    "llvm.nvvm.fmax.f16" => "__nvvm_fmax_f16",
-    "llvm.nvvm.fmax.f16x2" => "__nvvm_fmax_f16x2",
+    "llvm.nvvm.fmax.ftz.bf16" => "__nvvm_fmax_ftz_bf16",
+    "llvm.nvvm.fmax.ftz.bf16x2" => "__nvvm_fmax_ftz_bf16x2",
     "llvm.nvvm.fmax.ftz.f" => "__nvvm_fmax_ftz_f",
-    "llvm.nvvm.fmax.ftz.f16" => "__nvvm_fmax_ftz_f16",
-    "llvm.nvvm.fmax.ftz.f16x2" => "__nvvm_fmax_ftz_f16x2",
+    "llvm.nvvm.fmax.ftz.nan.bf16" => "__nvvm_fmax_ftz_nan_bf16",
+    "llvm.nvvm.fmax.ftz.nan.bf16x2" => "__nvvm_fmax_ftz_nan_bf16x2",
     "llvm.nvvm.fmax.ftz.nan.f" => "__nvvm_fmax_ftz_nan_f",
-    "llvm.nvvm.fmax.ftz.nan.f16" => "__nvvm_fmax_ftz_nan_f16",
-    "llvm.nvvm.fmax.ftz.nan.f16x2" => "__nvvm_fmax_ftz_nan_f16x2",
+    "llvm.nvvm.fmax.ftz.nan.xorsign.abs.bf16" => "__nvvm_fmax_ftz_nan_xorsign_abs_bf16",
+    "llvm.nvvm.fmax.ftz.nan.xorsign.abs.bf16x2" => "__nvvm_fmax_ftz_nan_xorsign_abs_bf16x2",
     "llvm.nvvm.fmax.ftz.nan.xorsign.abs.f" => "__nvvm_fmax_ftz_nan_xorsign_abs_f",
-    "llvm.nvvm.fmax.ftz.nan.xorsign.abs.f16" => "__nvvm_fmax_ftz_nan_xorsign_abs_f16",
-    "llvm.nvvm.fmax.ftz.nan.xorsign.abs.f16x2" => "__nvvm_fmax_ftz_nan_xorsign_abs_f16x2",
+    "llvm.nvvm.fmax.ftz.xorsign.abs.bf16" => "__nvvm_fmax_ftz_xorsign_abs_bf16",
+    "llvm.nvvm.fmax.ftz.xorsign.abs.bf16x2" => "__nvvm_fmax_ftz_xorsign_abs_bf16x2",
     "llvm.nvvm.fmax.ftz.xorsign.abs.f" => "__nvvm_fmax_ftz_xorsign_abs_f",
-    "llvm.nvvm.fmax.ftz.xorsign.abs.f16" => "__nvvm_fmax_ftz_xorsign_abs_f16",
-    "llvm.nvvm.fmax.ftz.xorsign.abs.f16x2" => "__nvvm_fmax_ftz_xorsign_abs_f16x2",
     "llvm.nvvm.fmax.nan.bf16" => "__nvvm_fmax_nan_bf16",
     "llvm.nvvm.fmax.nan.bf16x2" => "__nvvm_fmax_nan_bf16x2",
     "llvm.nvvm.fmax.nan.f" => "__nvvm_fmax_nan_f",
-    "llvm.nvvm.fmax.nan.f16" => "__nvvm_fmax_nan_f16",
-    "llvm.nvvm.fmax.nan.f16x2" => "__nvvm_fmax_nan_f16x2",
     "llvm.nvvm.fmax.nan.xorsign.abs.bf16" => "__nvvm_fmax_nan_xorsign_abs_bf16",
     "llvm.nvvm.fmax.nan.xorsign.abs.bf16x2" => "__nvvm_fmax_nan_xorsign_abs_bf16x2",
     "llvm.nvvm.fmax.nan.xorsign.abs.f" => "__nvvm_fmax_nan_xorsign_abs_f",
-    "llvm.nvvm.fmax.nan.xorsign.abs.f16" => "__nvvm_fmax_nan_xorsign_abs_f16",
-    "llvm.nvvm.fmax.nan.xorsign.abs.f16x2" => "__nvvm_fmax_nan_xorsign_abs_f16x2",
     "llvm.nvvm.fmax.xorsign.abs.bf16" => "__nvvm_fmax_xorsign_abs_bf16",
     "llvm.nvvm.fmax.xorsign.abs.bf16x2" => "__nvvm_fmax_xorsign_abs_bf16x2",
     "llvm.nvvm.fmax.xorsign.abs.f" => "__nvvm_fmax_xorsign_abs_f",
-    "llvm.nvvm.fmax.xorsign.abs.f16" => "__nvvm_fmax_xorsign_abs_f16",
-    "llvm.nvvm.fmax.xorsign.abs.f16x2" => "__nvvm_fmax_xorsign_abs_f16x2",
     "llvm.nvvm.fmin.bf16" => "__nvvm_fmin_bf16",
     "llvm.nvvm.fmin.bf16x2" => "__nvvm_fmin_bf16x2",
     "llvm.nvvm.fmin.d" => "__nvvm_fmin_d",
     "llvm.nvvm.fmin.f" => "__nvvm_fmin_f",
-    "llvm.nvvm.fmin.f16" => "__nvvm_fmin_f16",
-    "llvm.nvvm.fmin.f16x2" => "__nvvm_fmin_f16x2",
+    "llvm.nvvm.fmin.ftz.bf16" => "__nvvm_fmin_ftz_bf16",
+    "llvm.nvvm.fmin.ftz.bf16x2" => "__nvvm_fmin_ftz_bf16x2",
     "llvm.nvvm.fmin.ftz.f" => "__nvvm_fmin_ftz_f",
-    "llvm.nvvm.fmin.ftz.f16" => "__nvvm_fmin_ftz_f16",
-    "llvm.nvvm.fmin.ftz.f16x2" => "__nvvm_fmin_ftz_f16x2",
+    "llvm.nvvm.fmin.ftz.nan.bf16" => "__nvvm_fmin_ftz_nan_bf16",
+    "llvm.nvvm.fmin.ftz.nan.bf16x2" => "__nvvm_fmin_ftz_nan_bf16x2",
     "llvm.nvvm.fmin.ftz.nan.f" => "__nvvm_fmin_ftz_nan_f",
-    "llvm.nvvm.fmin.ftz.nan.f16" => "__nvvm_fmin_ftz_nan_f16",
-    "llvm.nvvm.fmin.ftz.nan.f16x2" => "__nvvm_fmin_ftz_nan_f16x2",
+    "llvm.nvvm.fmin.ftz.nan.xorsign.abs.bf16" => "__nvvm_fmin_ftz_nan_xorsign_abs_bf16",
+    "llvm.nvvm.fmin.ftz.nan.xorsign.abs.bf16x2" => "__nvvm_fmin_ftz_nan_xorsign_abs_bf16x2",
     "llvm.nvvm.fmin.ftz.nan.xorsign.abs.f" => "__nvvm_fmin_ftz_nan_xorsign_abs_f",
-    "llvm.nvvm.fmin.ftz.nan.xorsign.abs.f16" => "__nvvm_fmin_ftz_nan_xorsign_abs_f16",
-    "llvm.nvvm.fmin.ftz.nan.xorsign.abs.f16x2" => "__nvvm_fmin_ftz_nan_xorsign_abs_f16x2",
+    "llvm.nvvm.fmin.ftz.xorsign.abs.bf16" => "__nvvm_fmin_ftz_xorsign_abs_bf16",
+    "llvm.nvvm.fmin.ftz.xorsign.abs.bf16x2" => "__nvvm_fmin_ftz_xorsign_abs_bf16x2",
     "llvm.nvvm.fmin.ftz.xorsign.abs.f" => "__nvvm_fmin_ftz_xorsign_abs_f",
-    "llvm.nvvm.fmin.ftz.xorsign.abs.f16" => "__nvvm_fmin_ftz_xorsign_abs_f16",
-    "llvm.nvvm.fmin.ftz.xorsign.abs.f16x2" => "__nvvm_fmin_ftz_xorsign_abs_f16x2",
     "llvm.nvvm.fmin.nan.bf16" => "__nvvm_fmin_nan_bf16",
     "llvm.nvvm.fmin.nan.bf16x2" => "__nvvm_fmin_nan_bf16x2",
     "llvm.nvvm.fmin.nan.f" => "__nvvm_fmin_nan_f",
-    "llvm.nvvm.fmin.nan.f16" => "__nvvm_fmin_nan_f16",
-    "llvm.nvvm.fmin.nan.f16x2" => "__nvvm_fmin_nan_f16x2",
     "llvm.nvvm.fmin.nan.xorsign.abs.bf16" => "__nvvm_fmin_nan_xorsign_abs_bf16",
     "llvm.nvvm.fmin.nan.xorsign.abs.bf16x2" => "__nvvm_fmin_nan_xorsign_abs_bf16x2",
     "llvm.nvvm.fmin.nan.xorsign.abs.f" => "__nvvm_fmin_nan_xorsign_abs_f",
-    "llvm.nvvm.fmin.nan.xorsign.abs.f16" => "__nvvm_fmin_nan_xorsign_abs_f16",
-    "llvm.nvvm.fmin.nan.xorsign.abs.f16x2" => "__nvvm_fmin_nan_xorsign_abs_f16x2",
     "llvm.nvvm.fmin.xorsign.abs.bf16" => "__nvvm_fmin_xorsign_abs_bf16",
     "llvm.nvvm.fmin.xorsign.abs.bf16x2" => "__nvvm_fmin_xorsign_abs_bf16x2",
     "llvm.nvvm.fmin.xorsign.abs.f" => "__nvvm_fmin_xorsign_abs_f",
-    "llvm.nvvm.fmin.xorsign.abs.f16" => "__nvvm_fmin_xorsign_abs_f16",
-    "llvm.nvvm.fmin.xorsign.abs.f16x2" => "__nvvm_fmin_xorsign_abs_f16x2",
     "llvm.nvvm.fns" => "__nvvm_fns",
     "llvm.nvvm.h2f" => "__nvvm_h2f",
     "llvm.nvvm.i2d.rm" => "__nvvm_i2d_rm",
@@ -4255,6 +4267,28 @@ match name {
     "llvm.r600.read.tgid.x" => "__builtin_r600_read_tgid_x",
     "llvm.r600.read.tgid.y" => "__builtin_r600_read_tgid_y",
     "llvm.r600.read.tgid.z" => "__builtin_r600_read_tgid_z",
+    // riscv
+    "llvm.riscv.aes32dsi" => "__builtin_riscv_aes32dsi",
+    "llvm.riscv.aes32dsmi" => "__builtin_riscv_aes32dsmi",
+    "llvm.riscv.aes32esi" => "__builtin_riscv_aes32esi",
+    "llvm.riscv.aes32esmi" => "__builtin_riscv_aes32esmi",
+    "llvm.riscv.aes64ds" => "__builtin_riscv_aes64ds",
+    "llvm.riscv.aes64dsm" => "__builtin_riscv_aes64dsm",
+    "llvm.riscv.aes64es" => "__builtin_riscv_aes64es",
+    "llvm.riscv.aes64esm" => "__builtin_riscv_aes64esm",
+    "llvm.riscv.aes64im" => "__builtin_riscv_aes64im",
+    "llvm.riscv.aes64ks1i" => "__builtin_riscv_aes64ks1i",
+    "llvm.riscv.aes64ks2" => "__builtin_riscv_aes64ks2",
+    "llvm.riscv.sha512sig0" => "__builtin_riscv_sha512sig0",
+    "llvm.riscv.sha512sig0h" => "__builtin_riscv_sha512sig0h",
+    "llvm.riscv.sha512sig0l" => "__builtin_riscv_sha512sig0l",
+    "llvm.riscv.sha512sig1" => "__builtin_riscv_sha512sig1",
+    "llvm.riscv.sha512sig1h" => "__builtin_riscv_sha512sig1h",
+    "llvm.riscv.sha512sig1l" => "__builtin_riscv_sha512sig1l",
+    "llvm.riscv.sha512sum0" => "__builtin_riscv_sha512sum0",
+    "llvm.riscv.sha512sum0r" => "__builtin_riscv_sha512sum0r",
+    "llvm.riscv.sha512sum1" => "__builtin_riscv_sha512sum1",
+    "llvm.riscv.sha512sum1r" => "__builtin_riscv_sha512sum1r",
     // s390
     "llvm.s390.efpc" => "__builtin_s390_efpc",
     "llvm.s390.etnd" => "__builtin_tx_nesting_depth",
@@ -5954,6 +5988,18 @@ match name {
     "llvm.x86.avx2.vpdpbuud.256" => "__builtin_ia32_vpdpbuud256",
     "llvm.x86.avx2.vpdpbuuds.128" => "__builtin_ia32_vpdpbuuds128",
     "llvm.x86.avx2.vpdpbuuds.256" => "__builtin_ia32_vpdpbuuds256",
+    "llvm.x86.avx2.vpdpwsud.128" => "__builtin_ia32_vpdpwsud128",
+    "llvm.x86.avx2.vpdpwsud.256" => "__builtin_ia32_vpdpwsud256",
+    "llvm.x86.avx2.vpdpwsuds.128" => "__builtin_ia32_vpdpwsuds128",
+    "llvm.x86.avx2.vpdpwsuds.256" => "__builtin_ia32_vpdpwsuds256",
+    "llvm.x86.avx2.vpdpwusd.128" => "__builtin_ia32_vpdpwusd128",
+    "llvm.x86.avx2.vpdpwusd.256" => "__builtin_ia32_vpdpwusd256",
+    "llvm.x86.avx2.vpdpwusds.128" => "__builtin_ia32_vpdpwusds128",
+    "llvm.x86.avx2.vpdpwusds.256" => "__builtin_ia32_vpdpwusds256",
+    "llvm.x86.avx2.vpdpwuud.128" => "__builtin_ia32_vpdpwuud128",
+    "llvm.x86.avx2.vpdpwuud.256" => "__builtin_ia32_vpdpwuud256",
+    "llvm.x86.avx2.vpdpwuuds.128" => "__builtin_ia32_vpdpwuuds128",
+    "llvm.x86.avx2.vpdpwuuds.256" => "__builtin_ia32_vpdpwuuds256",
     "llvm.x86.avx2.vperm2i128" => "__builtin_ia32_permti256",
     "llvm.x86.avx512.add.pd.512" => "__builtin_ia32_addpd512",
     "llvm.x86.avx512.add.ps.512" => "__builtin_ia32_addps512",
@@ -7895,6 +7941,10 @@ match name {
     "llvm.x86.subborrow.u64" => "__builtin_ia32_subborrow_u64",
     "llvm.x86.tbm.bextri.u32" => "__builtin_ia32_bextri_u32",
     "llvm.x86.tbm.bextri.u64" => "__builtin_ia32_bextri_u64",
+    "llvm.x86.tcmmimfp16ps" => "__builtin_ia32_tcmmimfp16ps",
+    "llvm.x86.tcmmimfp16ps.internal" => "__builtin_ia32_tcmmimfp16ps_internal",
+    "llvm.x86.tcmmrlfp16ps" => "__builtin_ia32_tcmmrlfp16ps",
+    "llvm.x86.tcmmrlfp16ps.internal" => "__builtin_ia32_tcmmrlfp16ps_internal",
     "llvm.x86.tdpbf16ps" => "__builtin_ia32_tdpbf16ps",
     "llvm.x86.tdpbf16ps.internal" => "__builtin_ia32_tdpbf16ps_internal",
     "llvm.x86.tdpbssd" => "__builtin_ia32_tdpbssd",
@@ -7947,6 +7997,16 @@ match name {
     "llvm.x86.vgf2p8mulb.128" => "__builtin_ia32_vgf2p8mulb_v16qi",
     "llvm.x86.vgf2p8mulb.256" => "__builtin_ia32_vgf2p8mulb_v32qi",
     "llvm.x86.vgf2p8mulb.512" => "__builtin_ia32_vgf2p8mulb_v64qi",
+    "llvm.x86.vsha512msg1" => "__builtin_ia32_vsha512msg1",
+    "llvm.x86.vsha512msg2" => "__builtin_ia32_vsha512msg2",
+    "llvm.x86.vsha512rnds2" => "__builtin_ia32_vsha512rnds2",
+    "llvm.x86.vsm3msg1" => "__builtin_ia32_vsm3msg1",
+    "llvm.x86.vsm3msg2" => "__builtin_ia32_vsm3msg2",
+    "llvm.x86.vsm3rnds2" => "__builtin_ia32_vsm3rnds2",
+    "llvm.x86.vsm4key4128" => "__builtin_ia32_vsm4key4128",
+    "llvm.x86.vsm4key4256" => "__builtin_ia32_vsm4key4256",
+    "llvm.x86.vsm4rnds4128" => "__builtin_ia32_vsm4rnds4128",
+    "llvm.x86.vsm4rnds4256" => "__builtin_ia32_vsm4rnds4256",
     "llvm.x86.wbinvd" => "__builtin_ia32_wbinvd",
     "llvm.x86.wbnoinvd" => "__builtin_ia32_wbnoinvd",
     "llvm.x86.wrfsbase.32" => "__builtin_ia32_wrfsbase32",
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs b/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs
index 0edec566be3..5996623bdc5 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs
@@ -236,11 +236,17 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
                 let arg2 = builder.context.new_cast(None, arg2, arg2_type);
                 args = vec![new_args[0], arg2].into();
             },
+            // These builtins are sent one more argument than needed.
             "__builtin_prefetch" => {
                 let mut new_args = args.to_vec();
                 new_args.pop();
                 args = new_args.into();
             },
+            // The GCC version returns one value of the tuple through a pointer.
+            "__builtin_ia32_rdrand64_step" => {
+                let arg = builder.current_func().new_local(None, builder.ulonglong_type, "return_rdrand_arg");
+                args = vec![arg.get_address(None)].into();
+            },
             _ => (),
         }
     }
@@ -313,6 +319,13 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
                 let new_args = args.to_vec();
                 args = vec![new_args[1], new_args[0], new_args[2], new_args[3], new_args[4]].into();
             },
+            "__builtin_ia32_vpshrdv_v8di" | "__builtin_ia32_vpshrdv_v4di" | "__builtin_ia32_vpshrdv_v2di" |
+                "__builtin_ia32_vpshrdv_v16si" | "__builtin_ia32_vpshrdv_v8si" | "__builtin_ia32_vpshrdv_v4si" |
+                "__builtin_ia32_vpshrdv_v32hi" | "__builtin_ia32_vpshrdv_v16hi" | "__builtin_ia32_vpshrdv_v8hi" => {
+                // The first two arguments are reversed, compared to LLVM.
+                let new_args = args.to_vec();
+                args = vec![new_args[1], new_args[0], new_args[2]].into();
+            },
             _ => (),
         }
     }
@@ -354,6 +367,19 @@ pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc,
             // builtin twice, we overwrite the return value with a dummy value.
             return_value = builder.context.new_rvalue_zero(builder.int_type);
         },
+        "__builtin_ia32_rdrand64_step" => {
+            let random_number = args[0].dereference(None).to_rvalue();
+            let success_variable = builder.current_func().new_local(None, return_value.get_type(), "success");
+            builder.llbb().add_assignment(None, success_variable, return_value);
+
+            let field1 = builder.context.new_field(None, random_number.get_type(), "random_number");
+            let field2 = builder.context.new_field(None, return_value.get_type(), "success");
+            let struct_type = builder.context.new_struct_type(None, "rdrand_result", &[field1, field2]);
+            return_value = builder.context.new_struct_constructor(None, struct_type.as_type(), None, &[
+                random_number,
+                success_variable.to_rvalue(),
+            ]);
+        },
         _ => (),
     }
 
@@ -606,6 +632,7 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
         "llvm.fshr.v8i16" => "__builtin_ia32_vpshrdv_v8hi",
         "llvm.x86.fma.vfmadd.sd" => "__builtin_ia32_vfmaddsd3",
         "llvm.x86.fma.vfmadd.ss" => "__builtin_ia32_vfmaddss3",
+        "llvm.x86.rdrand.64" => "__builtin_ia32_rdrand64_step",
 
         // The above doc points to unknown builtins for the following, so override them:
         "llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si",
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
index 94dc8c9e93b..9caed459a29 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
@@ -10,9 +10,10 @@ use rustc_codegen_ssa::base::wants_msvc_seh;
 use rustc_codegen_ssa::common::IntPredicate;
 use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue};
 use rustc_codegen_ssa::mir::place::PlaceRef;
-use rustc_codegen_ssa::traits::{ArgAbiMethods, BaseTypeMethods, BuilderMethods, ConstMethods, IntrinsicCallMethods};
+use rustc_codegen_ssa::traits::{ArgAbiMethods, BuilderMethods, ConstMethods, IntrinsicCallMethods};
 #[cfg(feature="master")]
-use rustc_codegen_ssa::traits::{DerivedTypeMethods, MiscMethods};
+use rustc_codegen_ssa::traits::{BaseTypeMethods, MiscMethods};
+use rustc_codegen_ssa::errors::InvalidMonomorphization;
 use rustc_middle::bug;
 use rustc_middle::ty::{self, Instance, Ty};
 use rustc_middle::ty::layout::LayoutOf;
@@ -31,7 +32,6 @@ use crate::abi::FnAbiGccExt;
 use crate::builder::Builder;
 use crate::common::{SignType, TypeReflection};
 use crate::context::CodegenCx;
-use crate::errors::InvalidMonomorphizationBasicInteger;
 use crate::type_of::LayoutGccExt;
 use crate::intrinsic::simd::generic_simd_intrinsic;
 
@@ -92,8 +92,8 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
         let tcx = self.tcx;
         let callee_ty = instance.ty(tcx, ty::ParamEnv::reveal_all());
 
-        let (def_id, substs) = match *callee_ty.kind() {
-            ty::FnDef(def_id, substs) => (def_id, substs),
+        let (def_id, fn_args) = match *callee_ty.kind() {
+            ty::FnDef(def_id, fn_args) => (def_id, fn_args),
             _ => bug!("expected fn item type, found {}", callee_ty),
         };
 
@@ -113,7 +113,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
                 _ if simple.is_some() => {
                     // FIXME(antoyo): remove this cast when the API supports function.
                     let func = unsafe { std::mem::transmute(simple.expect("simple")) };
-                    self.call(self.type_void(), None, func, &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(), None)
+                    self.call(self.type_void(), None, None, func, &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(), None)
                 },
                 sym::likely => {
                     self.expect(args[0].immediate(), true)
@@ -142,9 +142,9 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
                 }
 
                 sym::volatile_load | sym::unaligned_volatile_load => {
-                    let tp_ty = substs.type_at(0);
+                    let tp_ty = fn_args.type_at(0);
                     let mut ptr = args[0].immediate();
-                    if let PassMode::Cast(ty, _) = &fn_abi.ret.mode {
+                    if let PassMode::Cast { cast: ty, .. } = &fn_abi.ret.mode {
                         ptr = self.pointercast(ptr, self.type_ptr_to(ty.gcc_type(self)));
                     }
                     let load = self.volatile_load(ptr.get_type(), ptr);
@@ -256,7 +256,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
                                 _ => bug!(),
                             },
                             None => {
-                                tcx.sess.emit_err(InvalidMonomorphizationBasicInteger { span, name, ty });
+                                tcx.sess.emit_err(InvalidMonomorphization::BasicIntegerType { span, name, ty });
                                 return;
                             }
                         }
@@ -264,7 +264,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
 
                 sym::raw_eq => {
                     use rustc_target::abi::Abi::*;
-                    let tp_ty = substs.type_at(0);
+                    let tp_ty = fn_args.type_at(0);
                     let layout = self.layout_of(tp_ty).layout;
                     let _use_integer_compare = match layout.abi() {
                         Scalar(_) | ScalarPair(_, _) => true,
@@ -302,6 +302,21 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
                     }
                 }
 
+                sym::compare_bytes => {
+                    let a = args[0].immediate();
+                    let b = args[1].immediate();
+                    let n = args[2].immediate();
+
+                    let void_ptr_type = self.context.new_type::<*const ()>();
+                    let a_ptr = self.bitcast(a, void_ptr_type);
+                    let b_ptr = self.bitcast(b, void_ptr_type);
+
+                    // Here we assume that the `memcmp` provided by the target is a NOP for size 0.
+                    let builtin = self.context.get_builtin_function("memcmp");
+                    let cmp = self.context.new_call(None, builtin, &[a_ptr, b_ptr, n]);
+                    self.sext(cmp, self.type_ix(32))
+                }
+
                 sym::black_box => {
                     args[0].val.store(self, result);
 
@@ -326,7 +341,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
                     let masked = self.and(addr, mask);
                     self.bitcast(masked, void_ptr_type)
                 },
-                
+
                 _ if name_str.starts_with("simd_") => {
                     match generic_simd_intrinsic(self, name, callee_ty, args, ret_ty, llret_ty, span) {
                         Ok(llval) => llval,
@@ -338,7 +353,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
             };
 
         if !fn_abi.ret.is_ignore() {
-            if let PassMode::Cast(ty, _) = &fn_abi.ret.mode {
+            if let PassMode::Cast { cast: ty, .. } = &fn_abi.ret.mode {
                 let ptr_llty = self.type_ptr_to(ty.gcc_type(self));
                 let ptr = self.pointercast(result.llval, ptr_llty);
                 self.store(llval, ptr, result.align);
@@ -354,7 +369,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
     fn abort(&mut self) {
         let func = self.context.get_builtin_function("abort");
         let func: RValue<'gcc> = unsafe { std::mem::transmute(func) };
-        self.call(self.type_void(), None, func, &[], None);
+        self.call(self.type_void(), None, None, func, &[], None);
     }
 
     fn assume(&mut self, value: Self::Value) {
@@ -434,7 +449,7 @@ impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
         else if self.is_unsized_indirect() {
             bug!("unsized `ArgAbi` must be handled through `store_fn_arg`");
         }
-        else if let PassMode::Cast(ref cast, _) = self.mode {
+        else if let PassMode::Cast { ref cast, .. } = self.mode {
             // FIXME(eddyb): Figure out when the simpler Store is safe, clang
             // uses it for i16 -> {i8, i8}, but not for i24 -> {i8, i8, i8}.
             let can_store_through_cast_ptr = false;
@@ -496,10 +511,10 @@ impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
             PassMode::Pair(..) => {
                 OperandValue::Pair(next(), next()).store(bx, dst);
             },
-            PassMode::Indirect { extra_attrs: Some(_), .. } => {
+            PassMode::Indirect { meta_attrs: Some(_), .. } => {
                 OperandValue::Ref(next(), Some(next()), self.layout.align.abi).store(bx, dst);
             },
-            PassMode::Direct(_) | PassMode::Indirect { extra_attrs: None, .. } | PassMode::Cast(..) => {
+            PassMode::Direct(_) | PassMode::Indirect { meta_attrs: None, .. } | PassMode::Cast { .. } => {
                 let next_arg = next();
                 self.store(bx, next_arg, dst);
             },
@@ -551,141 +566,52 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         let context = &self.cx.context;
         let result =
             match width {
-                8 => {
-                    // First step.
-                    let left = self.and(value, context.new_rvalue_from_int(typ, 0xF0));
-                    let left = self.lshr(left, context.new_rvalue_from_int(typ, 4));
-                    let right = self.and(value, context.new_rvalue_from_int(typ, 0x0F));
-                    let right = self.shl(right, context.new_rvalue_from_int(typ, 4));
-                    let step1 = self.or(left, right);
-
-                    // Second step.
-                    let left = self.and(step1, context.new_rvalue_from_int(typ, 0xCC));
-                    let left = self.lshr(left, context.new_rvalue_from_int(typ, 2));
-                    let right = self.and(step1, context.new_rvalue_from_int(typ, 0x33));
-                    let right = self.shl(right, context.new_rvalue_from_int(typ, 2));
-                    let step2 = self.or(left, right);
-
-                    // Third step.
-                    let left = self.and(step2, context.new_rvalue_from_int(typ, 0xAA));
-                    let left = self.lshr(left, context.new_rvalue_from_int(typ, 1));
-                    let right = self.and(step2, context.new_rvalue_from_int(typ, 0x55));
-                    let right = self.shl(right, context.new_rvalue_from_int(typ, 1));
-                    let step3 = self.or(left, right);
-
-                    step3
-                },
-                16 => {
-                    // First step.
-                    let left = self.and(value, context.new_rvalue_from_int(typ, 0x5555));
-                    let left = self.shl(left, context.new_rvalue_from_int(typ, 1));
-                    let right = self.and(value, context.new_rvalue_from_int(typ, 0xAAAA));
-                    let right = self.lshr(right, context.new_rvalue_from_int(typ, 1));
-                    let step1 = self.or(left, right);
-
-                    // Second step.
-                    let left = self.and(step1, context.new_rvalue_from_int(typ, 0x3333));
-                    let left = self.shl(left, context.new_rvalue_from_int(typ, 2));
-                    let right = self.and(step1, context.new_rvalue_from_int(typ, 0xCCCC));
-                    let right = self.lshr(right, context.new_rvalue_from_int(typ, 2));
-                    let step2 = self.or(left, right);
-
-                    // Third step.
-                    let left = self.and(step2, context.new_rvalue_from_int(typ, 0x0F0F));
-                    let left = self.shl(left, context.new_rvalue_from_int(typ, 4));
-                    let right = self.and(step2, context.new_rvalue_from_int(typ, 0xF0F0));
-                    let right = self.lshr(right, context.new_rvalue_from_int(typ, 4));
-                    let step3 = self.or(left, right);
-
-                    // Fourth step.
-                    let left = self.and(step3, context.new_rvalue_from_int(typ, 0x00FF));
-                    let left = self.shl(left, context.new_rvalue_from_int(typ, 8));
-                    let right = self.and(step3, context.new_rvalue_from_int(typ, 0xFF00));
-                    let right = self.lshr(right, context.new_rvalue_from_int(typ, 8));
-                    let step4 = self.or(left, right);
+                8 | 16 | 32 | 64 => {
+                    let mask = ((1u128 << width) - 1) as u64;
+                    let (m0, m1, m2) = if width > 16 {
+                        (
+                            context.new_rvalue_from_long(typ, (0x5555555555555555u64 & mask) as i64),
+                            context.new_rvalue_from_long(typ, (0x3333333333333333u64 & mask) as i64),
+                            context.new_rvalue_from_long(typ, (0x0f0f0f0f0f0f0f0fu64 & mask) as i64),
+                        )
+                    } else {
+                        (
+                            context.new_rvalue_from_int(typ, (0x5555u64 & mask) as i32),
+                            context.new_rvalue_from_int(typ, (0x3333u64 & mask) as i32),
+                            context.new_rvalue_from_int(typ, (0x0f0fu64 & mask) as i32),
+                        )
+                    };
+                    let one = context.new_rvalue_from_int(typ, 1);
+                    let two = context.new_rvalue_from_int(typ, 2);
+                    let four = context.new_rvalue_from_int(typ, 4);
 
-                    step4
-                },
-                32 => {
-                    // TODO(antoyo): Refactor with other implementations.
                     // First step.
-                    let left = self.and(value, context.new_rvalue_from_long(typ, 0x55555555));
-                    let left = self.shl(left, context.new_rvalue_from_long(typ, 1));
-                    let right = self.and(value, context.new_rvalue_from_long(typ, 0xAAAAAAAA));
-                    let right = self.lshr(right, context.new_rvalue_from_long(typ, 1));
+                    let left = self.lshr(value, one);
+                    let left = self.and(left, m0);
+                    let right = self.and(value, m0);
+                    let right = self.shl(right, one);
                     let step1 = self.or(left, right);
 
                     // Second step.
-                    let left = self.and(step1, context.new_rvalue_from_long(typ, 0x33333333));
-                    let left = self.shl(left, context.new_rvalue_from_long(typ, 2));
-                    let right = self.and(step1, context.new_rvalue_from_long(typ, 0xCCCCCCCC));
-                    let right = self.lshr(right, context.new_rvalue_from_long(typ, 2));
+                    let left = self.lshr(step1, two);
+                    let left = self.and(left, m1);
+                    let right = self.and(step1, m1);
+                    let right = self.shl(right, two);
                     let step2 = self.or(left, right);
 
                     // Third step.
-                    let left = self.and(step2, context.new_rvalue_from_long(typ, 0x0F0F0F0F));
-                    let left = self.shl(left, context.new_rvalue_from_long(typ, 4));
-                    let right = self.and(step2, context.new_rvalue_from_long(typ, 0xF0F0F0F0));
-                    let right = self.lshr(right, context.new_rvalue_from_long(typ, 4));
+                    let left = self.lshr(step2, four);
+                    let left = self.and(left, m2);
+                    let right = self.and(step2, m2);
+                    let right = self.shl(right, four);
                     let step3 = self.or(left, right);
 
                     // Fourth step.
-                    let left = self.and(step3, context.new_rvalue_from_long(typ, 0x00FF00FF));
-                    let left = self.shl(left, context.new_rvalue_from_long(typ, 8));
-                    let right = self.and(step3, context.new_rvalue_from_long(typ, 0xFF00FF00));
-                    let right = self.lshr(right, context.new_rvalue_from_long(typ, 8));
-                    let step4 = self.or(left, right);
-
-                    // Fifth step.
-                    let left = self.and(step4, context.new_rvalue_from_long(typ, 0x0000FFFF));
-                    let left = self.shl(left, context.new_rvalue_from_long(typ, 16));
-                    let right = self.and(step4, context.new_rvalue_from_long(typ, 0xFFFF0000));
-                    let right = self.lshr(right, context.new_rvalue_from_long(typ, 16));
-                    let step5 = self.or(left, right);
-
-                    step5
-                },
-                64 => {
-                    // First step.
-                    let left = self.shl(value, context.new_rvalue_from_long(typ, 32));
-                    let right = self.lshr(value, context.new_rvalue_from_long(typ, 32));
-                    let step1 = self.or(left, right);
-
-                    // Second step.
-                    let left = self.and(step1, context.new_rvalue_from_long(typ, 0x0001FFFF0001FFFF));
-                    let left = self.shl(left, context.new_rvalue_from_long(typ, 15));
-                    let right = self.and(step1, context.new_rvalue_from_long(typ, 0xFFFE0000FFFE0000u64 as i64)); // TODO(antoyo): transmute the number instead?
-                    let right = self.lshr(right, context.new_rvalue_from_long(typ, 17));
-                    let step2 = self.or(left, right);
-
-                    // Third step.
-                    let left = self.lshr(step2, context.new_rvalue_from_long(typ, 10));
-                    let left = self.xor(step2, left);
-                    let temp = self.and(left, context.new_rvalue_from_long(typ, 0x003F801F003F801F));
-
-                    let left = self.shl(temp, context.new_rvalue_from_long(typ, 10));
-                    let left = self.or(temp, left);
-                    let step3 = self.xor(left, step2);
-
-                    // Fourth step.
-                    let left = self.lshr(step3, context.new_rvalue_from_long(typ, 4));
-                    let left = self.xor(step3, left);
-                    let temp = self.and(left, context.new_rvalue_from_long(typ, 0x0E0384210E038421));
-
-                    let left = self.shl(temp, context.new_rvalue_from_long(typ, 4));
-                    let left = self.or(temp, left);
-                    let step4 = self.xor(left, step3);
-
-                    // Fifth step.
-                    let left = self.lshr(step4, context.new_rvalue_from_long(typ, 2));
-                    let left = self.xor(step4, left);
-                    let temp = self.and(left, context.new_rvalue_from_long(typ, 0x2248884222488842));
-
-                    let left = self.shl(temp, context.new_rvalue_from_long(typ, 2));
-                    let left = self.or(temp, left);
-                    let step5 = self.xor(left, step4);
-
-                    step5
+                    if width == 8 {
+                        step3
+                    } else {
+                        self.gcc_bswap(step3, width)
+                    }
                 },
                 128 => {
                     // TODO(antoyo): find a more efficient implementation?
@@ -1135,7 +1061,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
 fn try_intrinsic<'a, 'b, 'gcc, 'tcx>(bx: &'b mut Builder<'a, 'gcc, 'tcx>, try_func: RValue<'gcc>, data: RValue<'gcc>, _catch_func: RValue<'gcc>, dest: RValue<'gcc>) {
     if bx.sess().panic_strategy() == PanicStrategy::Abort {
-        bx.call(bx.type_void(), None, try_func, &[data], None);
+        bx.call(bx.type_void(), None, None, try_func, &[data], None);
         // Return 0 unconditionally from the intrinsic call;
         // we can never unwind.
         let ret_align = bx.tcx.data_layout.i32_align.abi;
@@ -1204,21 +1130,21 @@ fn codegen_gnu_try<'gcc>(bx: &mut Builder<'_, 'gcc, '_>, try_func: RValue<'gcc>,
         let zero = bx.cx.context.new_rvalue_zero(bx.int_type);
         let ptr = bx.cx.context.new_call(None, eh_pointer_builtin, &[zero]);
         let catch_ty = bx.type_func(&[bx.type_i8p(), bx.type_i8p()], bx.type_void());
-        bx.call(catch_ty, None, catch_func, &[data, ptr], None);
+        bx.call(catch_ty, None, None, catch_func, &[data, ptr], None);
         bx.ret(bx.const_i32(1));
 
         // NOTE: the blocks must be filled before adding the try/catch, otherwise gcc will not
         // generate a try/catch.
         // FIXME(antoyo): add a check in the libgccjit API to prevent this.
         bx.switch_to_block(current_block);
-        bx.invoke(try_func_ty, None, try_func, &[data], then, catch, None);
+        bx.invoke(try_func_ty, None, None, try_func, &[data], then, catch, None);
     });
 
     let func = unsafe { std::mem::transmute(func) };
 
     // Note that no invoke is used here because by definition this function
     // can't panic (that's what it's catching).
-    let ret = bx.call(llty, None, func, &[try_func, data, catch_func], None);
+    let ret = bx.call(llty, None, None, func, &[try_func, data, catch_func], None);
     let i32_align = bx.tcx().data_layout.i32_align.abi;
     bx.store(ret, dest, i32_align);
 }
@@ -1236,19 +1162,19 @@ fn get_rust_try_fn<'a, 'gcc, 'tcx>(cx: &'a CodegenCx<'gcc, 'tcx>, codegen: &mut
 
     // Define the type up front for the signature of the rust_try function.
     let tcx = cx.tcx;
-    let i8p = tcx.mk_mut_ptr(tcx.types.i8);
+    let i8p = Ty::new_mut_ptr(tcx,tcx.types.i8);
     // `unsafe fn(*mut i8) -> ()`
-    let try_fn_ty = tcx.mk_fn_ptr(ty::Binder::dummy(tcx.mk_fn_sig(
+    let try_fn_ty = Ty::new_fn_ptr(tcx,ty::Binder::dummy(tcx.mk_fn_sig(
         iter::once(i8p),
-        tcx.mk_unit(),
+        Ty::new_unit(tcx,),
         false,
         rustc_hir::Unsafety::Unsafe,
         Abi::Rust,
     )));
     // `unsafe fn(*mut i8, *mut i8) -> ()`
-    let catch_fn_ty = tcx.mk_fn_ptr(ty::Binder::dummy(tcx.mk_fn_sig(
+    let catch_fn_ty = Ty::new_fn_ptr(tcx,ty::Binder::dummy(tcx.mk_fn_sig(
         [i8p, i8p].iter().cloned(),
-        tcx.mk_unit(),
+        Ty::new_unit(tcx,),
         false,
         rustc_hir::Unsafety::Unsafe,
         Abi::Rust,
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
index b59c3a64f57..85d3e7234a0 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
@@ -1,11 +1,11 @@
-#[cfg(feature="master")]
-use gccjit::{ComparisonOp, UnaryOp};
 use gccjit::ToRValue;
 use gccjit::{BinaryOp, RValue, Type};
+#[cfg(feature = "master")]
+use gccjit::{ComparisonOp, UnaryOp};
 
 use rustc_codegen_ssa::base::compare_simd_types;
 use rustc_codegen_ssa::common::{IntPredicate, TypeKind};
-#[cfg(feature="master")]
+#[cfg(feature = "master")]
 use rustc_codegen_ssa::errors::ExpectedPointerMutability;
 use rustc_codegen_ssa::errors::InvalidMonomorphization;
 use rustc_codegen_ssa::mir::operand::OperandRef;
@@ -19,21 +19,8 @@ use rustc_span::{sym, Span, Symbol};
 use rustc_target::abi::Align;
 
 use crate::builder::Builder;
-#[cfg(feature="master")]
+#[cfg(feature = "master")]
 use crate::context::CodegenCx;
-#[cfg(feature="master")]
-use crate::errors::{InvalidMonomorphizationExpectedSignedUnsigned, InvalidMonomorphizationInsertedType};
-use crate::errors::{
-    InvalidMonomorphizationExpectedSimd,
-    InvalidMonomorphizationInvalidBitmask,
-    InvalidMonomorphizationInvalidFloatVector, InvalidMonomorphizationMaskType,
-    InvalidMonomorphizationMismatchedLengths, InvalidMonomorphizationNotFloat,
-    InvalidMonomorphizationReturnElement, InvalidMonomorphizationReturnIntegerType,
-    InvalidMonomorphizationReturnLength, InvalidMonomorphizationReturnLengthInputType,
-    InvalidMonomorphizationReturnType, InvalidMonomorphizationSimdShuffle,
-    InvalidMonomorphizationUnrecognized, InvalidMonomorphizationUnsupportedElement,
-    InvalidMonomorphizationUnsupportedOperation,
-};
 
 pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
     bx: &mut Builder<'a, 'gcc, 'tcx>,
@@ -59,16 +46,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         };
     }
     macro_rules! require_simd {
-        ($ty: expr, $position: expr) => {
-            require!(
-                $ty.is_simd(),
-                InvalidMonomorphizationExpectedSimd {
-                    span,
-                    name,
-                    position: $position,
-                    found_ty: $ty
-                }
-            )
+        ($ty: expr, $diag: expr) => {
+            require!($ty.is_simd(), $diag)
         };
     }
 
@@ -78,7 +57,10 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
     let arg_tys = sig.inputs();
 
     if name == sym::simd_select_bitmask {
-        require_simd!(arg_tys[1], "argument");
+        require_simd!(
+            arg_tys[1],
+            InvalidMonomorphization::SimdArgument { span, name, ty: arg_tys[1] }
+        );
         let (len, _) = arg_tys[1].simd_size_and_type(bx.tcx());
 
         let expected_int_bits = (len.max(8) - 1).next_power_of_two();
@@ -99,10 +81,10 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                 let ptr = bx.pointercast(place.llval, bx.cx.type_ptr_to(int_ty));
                 bx.load(int_ty, ptr, Align::ONE)
             }
-            _ => return_error!(InvalidMonomorphizationInvalidBitmask {
+            _ => return_error!(InvalidMonomorphization::InvalidBitmask {
                 span,
                 name,
-                ty: mask_ty,
+                mask_ty,
                 expected_int_bits,
                 expected_bytes
             }),
@@ -116,7 +98,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         // NOTE: since the arguments can be vectors of floats, make sure the mask is a vector of
         // integer.
         let mask_element_type = bx.type_ix(arg1_element_type.get_size() as u64 * 8);
-        let vector_mask_type = bx.context.new_vector_type(mask_element_type, arg1_vector_type.get_num_units() as u64);
+        let vector_mask_type =
+            bx.context.new_vector_type(mask_element_type, arg1_vector_type.get_num_units() as u64);
 
         let mut elements = vec![];
         let one = bx.context.new_rvalue_one(mask.get_type());
@@ -131,7 +114,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
     }
 
     // every intrinsic below takes a SIMD vector as its first argument
-    require_simd!(arg_tys[0], "input");
+    require_simd!(arg_tys[0], InvalidMonomorphization::SimdInput { span, name, ty: arg_tys[0] });
     let in_ty = arg_tys[0];
 
     let comparison = match name {
@@ -146,12 +129,12 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
 
     let (in_len, in_elem) = arg_tys[0].simd_size_and_type(bx.tcx());
     if let Some(cmp_op) = comparison {
-        require_simd!(ret_ty, "return");
+        require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty });
 
         let (out_len, out_ty) = ret_ty.simd_size_and_type(bx.tcx());
         require!(
             in_len == out_len,
-            InvalidMonomorphizationReturnLengthInputType {
+            InvalidMonomorphization::ReturnLengthInputType {
                 span,
                 name,
                 in_len,
@@ -162,51 +145,42 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         );
         require!(
             bx.type_kind(bx.element_type(llret_ty)) == TypeKind::Integer,
-            InvalidMonomorphizationReturnIntegerType { span, name, ret_ty, out_ty }
+            InvalidMonomorphization::ReturnIntegerType { span, name, ret_ty, out_ty }
         );
 
-        return Ok(compare_simd_types(
-            bx,
-            args[0].immediate(),
-            args[1].immediate(),
-            in_elem,
-            llret_ty,
-            cmp_op,
-        ));
+        let arg1 = args[0].immediate();
+        // NOTE: we get different vector types for the same vector type and libgccjit doesn't
+        // compare them as equal, so bitcast.
+        // FIXME(antoyo): allow comparing vector types as equal in libgccjit.
+        let arg2 = bx.context.new_bitcast(None, args[1].immediate(), arg1.get_type());
+        return Ok(compare_simd_types(bx, arg1, arg2, in_elem, llret_ty, cmp_op));
     }
 
-    if let Some(stripped) = name.as_str().strip_prefix("simd_shuffle") {
-        let n: u64 = if stripped.is_empty() {
-            // Make sure this is actually an array, since typeck only checks the length-suffixed
-            // version of this intrinsic.
-            match args[2].layout.ty.kind() {
-                ty::Array(ty, len) if matches!(ty.kind(), ty::Uint(ty::UintTy::U32)) => {
-                    len.try_eval_target_usize(bx.cx.tcx, ty::ParamEnv::reveal_all()).unwrap_or_else(
-                        || span_bug!(span, "could not evaluate shuffle index array length"),
-                    )
-                }
-                _ => return_error!(InvalidMonomorphizationSimdShuffle {
-                    span,
-                    name,
-                    ty: args[2].layout.ty
-                }),
+    if name == sym::simd_shuffle {
+        // Make sure this is actually an array, since typeck only checks the length-suffixed
+        // version of this intrinsic.
+        let n: u64 = match args[2].layout.ty.kind() {
+            ty::Array(ty, len) if matches!(ty.kind(), ty::Uint(ty::UintTy::U32)) => {
+                len.try_eval_target_usize(bx.cx.tcx, ty::ParamEnv::reveal_all()).unwrap_or_else(
+                    || span_bug!(span, "could not evaluate shuffle index array length"),
+                )
             }
-        } else {
-            stripped.parse().unwrap_or_else(|_| {
-                span_bug!(span, "bad `simd_shuffle` instruction only caught in codegen?")
-            })
+            _ => return_error!(InvalidMonomorphization::SimdShuffle {
+                span,
+                name,
+                ty: args[2].layout.ty
+            }),
         };
-
-        require_simd!(ret_ty, "return");
+        require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty });
 
         let (out_len, out_ty) = ret_ty.simd_size_and_type(bx.tcx());
         require!(
             out_len == n,
-            InvalidMonomorphizationReturnLength { span, name, in_len: n, ret_ty, out_len }
+            InvalidMonomorphization::ReturnLength { span, name, in_len: n, ret_ty, out_len }
         );
         require!(
             in_elem == out_ty,
-            InvalidMonomorphizationReturnElement { span, name, in_elem, in_ty, ret_ty, out_ty }
+            InvalidMonomorphization::ReturnElement { span, name, in_elem, in_ty, ret_ty, out_ty }
         );
 
         let vector = args[2].immediate();
@@ -218,7 +192,13 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
     if name == sym::simd_insert {
         require!(
             in_elem == arg_tys[2],
-            InvalidMonomorphizationInsertedType { span, name, in_elem, in_ty, out_ty: arg_tys[2] }
+            InvalidMonomorphization::InsertedType {
+                span,
+                name,
+                in_elem,
+                in_ty,
+                out_ty: arg_tys[2]
+            }
         );
         let vector = args[0].immediate();
         let index = args[1].immediate();
@@ -235,7 +215,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
     if name == sym::simd_extract {
         require!(
             ret_ty == in_elem,
-            InvalidMonomorphizationReturnType { span, name, in_elem, in_ty, ret_ty }
+            InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
         );
         let vector = args[0].immediate();
         return Ok(bx.context.new_vector_access(None, vector, args[1].immediate()).to_rvalue());
@@ -244,26 +224,29 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
     if name == sym::simd_select {
         let m_elem_ty = in_elem;
         let m_len = in_len;
-        require_simd!(arg_tys[1], "argument");
+        require_simd!(
+            arg_tys[1],
+            InvalidMonomorphization::SimdArgument { span, name, ty: arg_tys[1] }
+        );
         let (v_len, _) = arg_tys[1].simd_size_and_type(bx.tcx());
         require!(
             m_len == v_len,
-            InvalidMonomorphizationMismatchedLengths { span, name, m_len, v_len }
+            InvalidMonomorphization::MismatchedLengths { span, name, m_len, v_len }
         );
         match m_elem_ty.kind() {
             ty::Int(_) => {}
-            _ => return_error!(InvalidMonomorphizationMaskType { span, name, ty: m_elem_ty }),
+            _ => return_error!(InvalidMonomorphization::MaskType { span, name, ty: m_elem_ty }),
         }
         return Ok(bx.vector_select(args[0].immediate(), args[1].immediate(), args[2].immediate()));
     }
 
-    #[cfg(feature="master")]
+    #[cfg(feature = "master")]
     if name == sym::simd_cast || name == sym::simd_as {
-        require_simd!(ret_ty, "return");
+        require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty });
         let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());
         require!(
             in_len == out_len,
-            InvalidMonomorphizationReturnLengthInputType {
+            InvalidMonomorphization::ReturnLengthInputType {
                 span,
                 name,
                 in_len,
@@ -283,19 +266,17 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             Unsupported,
         }
 
-        let in_style =
-            match in_elem.kind() {
-                ty::Int(_) | ty::Uint(_) => Style::Int,
-                ty::Float(_) => Style::Float,
-                 _ => Style::Unsupported,
-            };
+        let in_style = match in_elem.kind() {
+            ty::Int(_) | ty::Uint(_) => Style::Int,
+            ty::Float(_) => Style::Float,
+            _ => Style::Unsupported,
+        };
 
-        let out_style =
-            match out_elem.kind() {
-                ty::Int(_) | ty::Uint(_) => Style::Int,
-                ty::Float(_) => Style::Float,
-                 _ => Style::Unsupported,
-            };
+        let out_style = match out_elem.kind() {
+            ty::Int(_) | ty::Uint(_) => Style::Int,
+            ty::Float(_) => Style::Float,
+            _ => Style::Unsupported,
+        };
 
         match (in_style, out_style) {
             (Style::Unsupported, Style::Unsupported) => {
@@ -310,7 +291,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                         out_elem
                     }
                 );
-            },
+            }
             _ => return Ok(bx.context.convert_vector(None, args[0].immediate(), llret_ty)),
         }
     }
@@ -324,7 +305,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                     })*
                     _ => {},
                 }
-                return_error!(InvalidMonomorphizationUnsupportedOperation { span, name, in_ty, in_elem })
+                return_error!(InvalidMonomorphization::UnsupportedOperation { span, name, in_ty, in_elem })
             })*
         }
     }
@@ -341,7 +322,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         // endian and MSB-first for big endian.
 
         let vector = args[0].immediate();
-        let vector_type = vector.get_type().dyncast_vector().expect("vector type");
+        // TODO(antoyo): dyncast_vector should not require a call to unqualified.
+        let vector_type = vector.get_type().unqualified().dyncast_vector().expect("vector type");
         let elem_type = vector_type.get_element_type();
 
         let expected_int_bits = in_len.max(8);
@@ -357,10 +339,13 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
 
         let mut shift = 0;
         for i in 0..in_len {
-            let elem = bx.extract_element(vector, bx.context.new_rvalue_from_int(bx.int_type, i as i32));
+            let elem =
+                bx.extract_element(vector, bx.context.new_rvalue_from_int(bx.int_type, i as i32));
             let shifted = elem >> sign_shift;
             let masked = shifted & one;
-            result = result | (bx.context.new_cast(None, masked, result_type) << bx.context.new_rvalue_from_int(result_type, shift));
+            result = result
+                | (bx.context.new_cast(None, masked, result_type)
+                    << bx.context.new_rvalue_from_int(result_type, shift));
             shift += 1;
         }
 
@@ -409,46 +394,50 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                 return Err(());
             }};
         }
-        let (elem_ty_str, elem_ty) =
-            if let ty::Float(f) = in_elem.kind() {
-                let elem_ty = bx.cx.type_float_from_ty(*f);
-                match f.bit_width() {
-                    32 => ("f", elem_ty),
-                    64 => ("", elem_ty),
-                    _ => {
-                        return_error!(InvalidMonomorphizationInvalidFloatVector { span, name, elem_ty: f.name_str(), vec_ty: in_ty });
-                    }
+        let (elem_ty_str, elem_ty) = if let ty::Float(f) = in_elem.kind() {
+            let elem_ty = bx.cx.type_float_from_ty(*f);
+            match f.bit_width() {
+                32 => ("f", elem_ty),
+                64 => ("", elem_ty),
+                _ => {
+                    return_error!(InvalidMonomorphization::FloatingPointVector {
+                        span,
+                        name,
+                        f_ty: *f,
+                        in_ty
+                    });
                 }
             }
-            else {
-                return_error!(InvalidMonomorphizationNotFloat { span, name, ty: in_ty });
-            };
+        } else {
+            return_error!(InvalidMonomorphization::FloatingPointType { span, name, in_ty });
+        };
 
         let vec_ty = bx.cx.type_vector(elem_ty, in_len);
 
-        let intr_name =
-            match name {
-                sym::simd_ceil => "ceil",
-                sym::simd_fabs => "fabs", // TODO(antoyo): pand with 170141183420855150465331762880109871103
-                sym::simd_fcos => "cos",
-                sym::simd_fexp2 => "exp2",
-                sym::simd_fexp => "exp",
-                sym::simd_flog10 => "log10",
-                sym::simd_flog2 => "log2",
-                sym::simd_flog => "log",
-                sym::simd_floor => "floor",
-                sym::simd_fma => "fma",
-                sym::simd_fpowi => "__builtin_powi",
-                sym::simd_fpow => "pow",
-                sym::simd_fsin => "sin",
-                sym::simd_fsqrt => "sqrt",
-                sym::simd_round => "round",
-                sym::simd_trunc => "trunc",
-                _ => return_error!(InvalidMonomorphizationUnrecognized { span, name })
-            };
+        let intr_name = match name {
+            sym::simd_ceil => "ceil",
+            sym::simd_fabs => "fabs", // TODO(antoyo): pand with 170141183420855150465331762880109871103
+            sym::simd_fcos => "cos",
+            sym::simd_fexp2 => "exp2",
+            sym::simd_fexp => "exp",
+            sym::simd_flog10 => "log10",
+            sym::simd_flog2 => "log2",
+            sym::simd_flog => "log",
+            sym::simd_floor => "floor",
+            sym::simd_fma => "fma",
+            sym::simd_fpowi => "__builtin_powi",
+            sym::simd_fpow => "pow",
+            sym::simd_fsin => "sin",
+            sym::simd_fsqrt => "sqrt",
+            sym::simd_round => "round",
+            sym::simd_trunc => "trunc",
+            _ => return_error!(InvalidMonomorphization::UnrecognizedIntrinsic { span, name }),
+        };
         let builtin_name = format!("{}{}", intr_name, elem_ty_str);
         let funcs = bx.cx.functions.borrow();
-        let function = funcs.get(&builtin_name).unwrap_or_else(|| panic!("unable to find builtin function {}", builtin_name));
+        let function = funcs
+            .get(&builtin_name)
+            .unwrap_or_else(|| panic!("unable to find builtin function {}", builtin_name));
 
         // TODO(antoyo): add platform-specific behavior here for architectures that have these
         // intrinsics as instructions (for instance, gpus)
@@ -494,8 +483,12 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         return simd_simple_float_intrinsic(name, in_elem, in_ty, in_len, bx, span, args);
     }
 
-    #[cfg(feature="master")]
-    fn vector_ty<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, elem_ty: Ty<'tcx>, vec_len: u64) -> Type<'gcc> {
+    #[cfg(feature = "master")]
+    fn vector_ty<'gcc, 'tcx>(
+        cx: &CodegenCx<'gcc, 'tcx>,
+        elem_ty: Ty<'tcx>,
+        vec_len: u64,
+    ) -> Type<'gcc> {
         // FIXME: use cx.layout_of(ty).llvm_type() ?
         let elem_ty = match *elem_ty.kind() {
             ty::Int(v) => cx.type_int_from_ty(v),
@@ -506,15 +499,22 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         cx.type_vector(elem_ty, vec_len)
     }
 
-    #[cfg(feature="master")]
-    fn gather<'a, 'gcc, 'tcx>(default: RValue<'gcc>, pointers: RValue<'gcc>, mask: RValue<'gcc>, pointer_count: usize, bx: &mut Builder<'a, 'gcc, 'tcx>, in_len: u64, underlying_ty: Ty<'tcx>, invert: bool) -> RValue<'gcc> {
-        let vector_type =
-            if pointer_count > 1 {
-                bx.context.new_vector_type(bx.usize_type, in_len)
-            }
-            else {
-                vector_ty(bx, underlying_ty, in_len)
-            };
+    #[cfg(feature = "master")]
+    fn gather<'a, 'gcc, 'tcx>(
+        default: RValue<'gcc>,
+        pointers: RValue<'gcc>,
+        mask: RValue<'gcc>,
+        pointer_count: usize,
+        bx: &mut Builder<'a, 'gcc, 'tcx>,
+        in_len: u64,
+        underlying_ty: Ty<'tcx>,
+        invert: bool,
+    ) -> RValue<'gcc> {
+        let vector_type = if pointer_count > 1 {
+            bx.context.new_vector_type(bx.usize_type, in_len)
+        } else {
+            vector_ty(bx, underlying_ty, in_len)
+        };
         let elem_type = vector_type.dyncast_vector().expect("vector type").get_element_type();
 
         let mut values = vec![];
@@ -545,13 +545,12 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
 
         if invert {
             bx.shuffle_vector(vector, default, mask)
-        }
-        else {
+        } else {
             bx.shuffle_vector(default, vector, mask)
         }
     }
 
-    #[cfg(feature="master")]
+    #[cfg(feature = "master")]
     if name == sym::simd_gather {
         // simd_gather(values: <N x T>, pointers: <N x *_ T>,
         //             mask: <N x i{M}>) -> <N x T>
@@ -560,10 +559,16 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         // * M: any integer width is supported, will be truncated to i1
 
         // All types must be simd vector types
-        require_simd!(in_ty, "first");
-        require_simd!(arg_tys[1], "second");
-        require_simd!(arg_tys[2], "third");
-        require_simd!(ret_ty, "return");
+        require_simd!(in_ty, InvalidMonomorphization::SimdFirst { span, name, ty: in_ty });
+        require_simd!(
+            arg_tys[1],
+            InvalidMonomorphization::SimdSecond { span, name, ty: arg_tys[1] }
+        );
+        require_simd!(
+            arg_tys[2],
+            InvalidMonomorphization::SimdThird { span, name, ty: arg_tys[2] }
+        );
+        require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty });
 
         // Of the same length:
         let (out_len, _) = arg_tys[1].simd_size_and_type(bx.tcx());
@@ -656,10 +661,19 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             }
         }
 
-        return Ok(gather(args[0].immediate(), args[1].immediate(), args[2].immediate(), pointer_count, bx, in_len, underlying_ty, false));
+        return Ok(gather(
+            args[0].immediate(),
+            args[1].immediate(),
+            args[2].immediate(),
+            pointer_count,
+            bx,
+            in_len,
+            underlying_ty,
+            false,
+        ));
     }
 
-    #[cfg(feature="master")]
+    #[cfg(feature = "master")]
     if name == sym::simd_scatter {
         // simd_scatter(values: <N x T>, pointers: <N x *mut T>,
         //             mask: <N x i{M}>) -> ()
@@ -668,9 +682,15 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         // * M: any integer width is supported, will be truncated to i1
 
         // All types must be simd vector types
-        require_simd!(in_ty, "first");
-        require_simd!(arg_tys[1], "second");
-        require_simd!(arg_tys[2], "third");
+        require_simd!(in_ty, InvalidMonomorphization::SimdFirst { span, name, ty: in_ty });
+        require_simd!(
+            arg_tys[1],
+            InvalidMonomorphization::SimdSecond { span, name, ty: arg_tys[1] }
+        );
+        require_simd!(
+            arg_tys[2],
+            InvalidMonomorphization::SimdThird { span, name, ty: arg_tys[2] }
+        );
 
         // Of the same length:
         let (element_len1, _) = arg_tys[1].simd_size_and_type(bx.tcx());
@@ -759,17 +779,24 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             }
         }
 
-        let result = gather(args[0].immediate(), args[1].immediate(), args[2].immediate(), pointer_count, bx, in_len, underlying_ty, true);
+        let result = gather(
+            args[0].immediate(),
+            args[1].immediate(),
+            args[2].immediate(),
+            pointer_count,
+            bx,
+            in_len,
+            underlying_ty,
+            true,
+        );
 
         let pointers = args[1].immediate();
 
-        let vector_type =
-            if pointer_count > 1 {
-                bx.context.new_vector_type(bx.usize_type, in_len)
-            }
-            else {
-                vector_ty(bx, underlying_ty, in_len)
-            };
+        let vector_type = if pointer_count > 1 {
+            bx.context.new_vector_type(bx.usize_type, in_len)
+        } else {
+            vector_ty(bx, underlying_ty, in_len)
+        };
         let elem_type = vector_type.dyncast_vector().expect("vector type").get_element_type();
 
         for i in 0..in_len {
@@ -809,7 +836,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                     })*
                     _ => {},
                 }
-                return_error!(InvalidMonomorphizationUnsupportedOperation { span, name, in_ty, in_elem })
+                return_error!(InvalidMonomorphization::UnsupportedOperation { span, name, in_ty, in_elem })
             })*
         }
     }
@@ -824,89 +851,97 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         let rhs = args[1].immediate();
         let is_add = name == sym::simd_saturating_add;
         let ptr_bits = bx.tcx().data_layout.pointer_size.bits() as _;
-        let (signed, elem_width, elem_ty) =
-            match *in_elem.kind() {
-                ty::Int(i) => (true, i.bit_width().unwrap_or(ptr_bits) / 8, bx.cx.type_int_from_ty(i)),
-                ty::Uint(i) => (false, i.bit_width().unwrap_or(ptr_bits) / 8, bx.cx.type_uint_from_ty(i)),
-                _ => {
-                return_error!(InvalidMonomorphizationExpectedSignedUnsigned {
+        let (signed, elem_width, elem_ty) = match *in_elem.kind() {
+            ty::Int(i) => (true, i.bit_width().unwrap_or(ptr_bits) / 8, bx.cx.type_int_from_ty(i)),
+            ty::Uint(i) => {
+                (false, i.bit_width().unwrap_or(ptr_bits) / 8, bx.cx.type_uint_from_ty(i))
+            }
+            _ => {
+                return_error!(InvalidMonomorphization::ExpectedVectorElementType {
                     span,
                     name,
-                    elem_ty: arg_tys[0].simd_size_and_type(bx.tcx()).1,
-                    vec_ty: arg_tys[0],
+                    expected_element: arg_tys[0].simd_size_and_type(bx.tcx()).1,
+                    vector_type: arg_tys[0],
                 });
             }
         };
 
-        let result =
-            match (signed, is_add) {
-                (false, true) => {
-                    let res = lhs + rhs;
-                    let cmp = bx.context.new_comparison(None, ComparisonOp::LessThan, res, lhs);
-                    res | cmp
-                },
-                (true, true) => {
-                    // Algorithm from: https://codereview.stackexchange.com/questions/115869/saturated-signed-addition
-                    // TODO(antoyo): improve using conditional operators if possible.
-                    let arg_type = lhs.get_type();
-                    // TODO(antoyo): convert lhs and rhs to unsigned.
-                    let sum = lhs + rhs;
-                    let vector_type = arg_type.dyncast_vector().expect("vector type");
-                    let unit = vector_type.get_num_units();
-                    let a = bx.context.new_rvalue_from_int(elem_ty, ((elem_width as i32) << 3) - 1);
-                    let width = bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![a; unit]);
-
-                    let xor1 = lhs ^ rhs;
-                    let xor2 = lhs ^ sum;
-                    let and = bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, xor1) & xor2;
-                    let mask = and >> width;
-
-                    let one = bx.context.new_rvalue_one(elem_ty);
-                    let ones = bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![one; unit]);
-                    let shift1 = ones << width;
-                    let shift2 = sum >> width;
-                    let mask_min = shift1 ^ shift2;
-
-                    let and1 = bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, mask) & sum;
-                    let and2 = mask & mask_min;
-
-                    and1 + and2
-                },
-                (false, false) => {
-                    let res = lhs - rhs;
-                    let cmp = bx.context.new_comparison(None, ComparisonOp::LessThanEquals, res, lhs);
-                    res & cmp
-                },
-                (true, false) => {
-                    let arg_type = lhs.get_type();
-                    // TODO(antoyo): this uses the same algorithm from saturating add, but add the
-                    // negative of the right operand. Find a proper subtraction algorithm.
-                    let rhs = bx.context.new_unary_op(None, UnaryOp::Minus, arg_type, rhs);
-
-                    // TODO(antoyo): convert lhs and rhs to unsigned.
-                    let sum = lhs + rhs;
-                    let vector_type = arg_type.dyncast_vector().expect("vector type");
-                    let unit = vector_type.get_num_units();
-                    let a = bx.context.new_rvalue_from_int(elem_ty, ((elem_width as i32) << 3) - 1);
-                    let width = bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![a; unit]);
-
-                    let xor1 = lhs ^ rhs;
-                    let xor2 = lhs ^ sum;
-                    let and = bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, xor1) & xor2;
-                    let mask = and >> width;
-
-                    let one = bx.context.new_rvalue_one(elem_ty);
-                    let ones = bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![one; unit]);
-                    let shift1 = ones << width;
-                    let shift2 = sum >> width;
-                    let mask_min = shift1 ^ shift2;
-
-                    let and1 = bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, mask) & sum;
-                    let and2 = mask & mask_min;
-
-                    and1 + and2
-                }
-            };
+        let result = match (signed, is_add) {
+            (false, true) => {
+                let res = lhs + rhs;
+                let cmp = bx.context.new_comparison(None, ComparisonOp::LessThan, res, lhs);
+                res | cmp
+            }
+            (true, true) => {
+                // Algorithm from: https://codereview.stackexchange.com/questions/115869/saturated-signed-addition
+                // TODO(antoyo): improve using conditional operators if possible.
+                // TODO(antoyo): dyncast_vector should not require a call to unqualified.
+                let arg_type = lhs.get_type().unqualified();
+                // TODO(antoyo): convert lhs and rhs to unsigned.
+                let sum = lhs + rhs;
+                let vector_type = arg_type.dyncast_vector().expect("vector type");
+                let unit = vector_type.get_num_units();
+                let a = bx.context.new_rvalue_from_int(elem_ty, ((elem_width as i32) << 3) - 1);
+                let width = bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![a; unit]);
+
+                let xor1 = lhs ^ rhs;
+                let xor2 = lhs ^ sum;
+                let and =
+                    bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, xor1) & xor2;
+                let mask = and >> width;
+
+                let one = bx.context.new_rvalue_one(elem_ty);
+                let ones =
+                    bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![one; unit]);
+                let shift1 = ones << width;
+                let shift2 = sum >> width;
+                let mask_min = shift1 ^ shift2;
+
+                let and1 =
+                    bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, mask) & sum;
+                let and2 = mask & mask_min;
+
+                and1 + and2
+            }
+            (false, false) => {
+                let res = lhs - rhs;
+                let cmp = bx.context.new_comparison(None, ComparisonOp::LessThanEquals, res, lhs);
+                res & cmp
+            }
+            (true, false) => {
+                // TODO(antoyo): dyncast_vector should not require a call to unqualified.
+                let arg_type = lhs.get_type().unqualified();
+                // TODO(antoyo): this uses the same algorithm from saturating add, but add the
+                // negative of the right operand. Find a proper subtraction algorithm.
+                let rhs = bx.context.new_unary_op(None, UnaryOp::Minus, arg_type, rhs);
+
+                // TODO(antoyo): convert lhs and rhs to unsigned.
+                let sum = lhs + rhs;
+                let vector_type = arg_type.dyncast_vector().expect("vector type");
+                let unit = vector_type.get_num_units();
+                let a = bx.context.new_rvalue_from_int(elem_ty, ((elem_width as i32) << 3) - 1);
+                let width = bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![a; unit]);
+
+                let xor1 = lhs ^ rhs;
+                let xor2 = lhs ^ sum;
+                let and =
+                    bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, xor1) & xor2;
+                let mask = and >> width;
+
+                let one = bx.context.new_rvalue_one(elem_ty);
+                let ones =
+                    bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![one; unit]);
+                let shift1 = ones << width;
+                let shift2 = sum >> width;
+                let mask_min = shift1 ^ shift2;
+
+                let and1 =
+                    bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, mask) & sum;
+                let and2 = mask & mask_min;
+
+                and1 + and2
+            }
+        };
 
         return Ok(result);
     }
@@ -917,7 +952,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             if name == sym::$name {
                 require!(
                     ret_ty == in_elem,
-                    InvalidMonomorphizationReturnType { span, name, in_elem, in_ty, ret_ty }
+                    InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
                 );
                 return match in_elem.kind() {
                     ty::Int(_) | ty::Uint(_) => {
@@ -939,11 +974,12 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                             Ok(bx.vector_reduce_op(args[0].immediate(), $vec_op))
                         }
                     }
-                    _ => return_error!(InvalidMonomorphizationUnsupportedElement {
+                    _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
                         span,
                         name,
+                        symbol: sym::$name,
                         in_ty,
-                        elem_ty: in_elem,
+                        in_elem,
                         ret_ty
                     }),
                 };
@@ -980,18 +1016,24 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         1.0
     );
 
-
     macro_rules! minmax_red {
         ($name:ident: $int_red:ident, $float_red:ident) => {
             if name == sym::$name {
                 require!(
                     ret_ty == in_elem,
-                    InvalidMonomorphizationReturnType { span, name, in_elem, in_ty, ret_ty }
+                    InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
                 );
                 return match in_elem.kind() {
                     ty::Int(_) | ty::Uint(_) => Ok(bx.$int_red(args[0].immediate())),
                     ty::Float(_) => Ok(bx.$float_red(args[0].immediate())),
-                    _ => return_error!(InvalidMonomorphizationUnsupportedElement { span, name, in_ty, elem_ty: in_elem, ret_ty }),
+                    _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
+                        span,
+                        name,
+                        symbol: sym::$name,
+                        in_ty,
+                        in_elem,
+                        ret_ty
+                    }),
                 };
             }
         };
@@ -1009,17 +1051,18 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                 let input = if !$boolean {
                     require!(
                         ret_ty == in_elem,
-                        InvalidMonomorphizationReturnType { span, name, in_elem, in_ty, ret_ty }
+                        InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
                     );
                     args[0].immediate()
                 } else {
                     match in_elem.kind() {
                         ty::Int(_) | ty::Uint(_) => {}
-                        _ => return_error!(InvalidMonomorphizationUnsupportedElement {
+                        _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
                             span,
                             name,
+                            symbol: sym::$name,
                             in_ty,
-                            elem_ty: in_elem,
+                            in_elem,
                             ret_ty
                         }),
                     }
@@ -1029,13 +1072,22 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                 return match in_elem.kind() {
                     ty::Int(_) | ty::Uint(_) => {
                         let r = bx.vector_reduce_op(input, $op);
-                        Ok(if !$boolean { r } else { bx.icmp(IntPredicate::IntNE, r, bx.context.new_rvalue_zero(r.get_type())) })
+                        Ok(if !$boolean {
+                            r
+                        } else {
+                            bx.icmp(
+                                IntPredicate::IntNE,
+                                r,
+                                bx.context.new_rvalue_zero(r.get_type()),
+                            )
+                        })
                     }
-                    _ => return_error!(InvalidMonomorphizationUnsupportedElement {
+                    _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
                         span,
                         name,
+                        symbol: sym::$name,
                         in_ty,
-                        elem_ty: in_elem,
+                        in_elem,
                         ret_ty
                     }),
                 };
diff --git a/compiler/rustc_codegen_gcc/src/lib.rs b/compiler/rustc_codegen_gcc/src/lib.rs
index 1a20dbcebd4..9c18fc4a0dc 100644
--- a/compiler/rustc_codegen_gcc/src/lib.rs
+++ b/compiler/rustc_codegen_gcc/src/lib.rs
@@ -2,6 +2,12 @@
  * TODO(antoyo): implement equality in libgccjit based on https://zpz.github.io/blog/overloading-equality-operator-in-cpp-class-hierarchy/ (for type equality?)
  * TODO(antoyo): support #[inline] attributes.
  * TODO(antoyo): support LTO (gcc's equivalent to Full LTO is -flto -flto-partition=one — https://documentation.suse.com/sbp/all/html/SBP-GCC-10/index.html).
+ * For Thin LTO, this might be helpful:
+ * In gcc 4.6 -fwhopr was removed and became default with -flto. The non-whopr path can still be executed via -flto-partition=none.
+ *
+ * Maybe some missing optizations enabled by rustc's LTO is in there: https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html
+ * Like -fipa-icf (should be already enabled) and maybe -fdevirtualize-at-ltrans.
+ * TODO: disable debug info always being emitted. Perhaps this slows down things?
  *
  * TODO(antoyo): remove the patches.
  */
@@ -28,6 +34,7 @@ extern crate rustc_codegen_ssa;
 extern crate rustc_data_structures;
 extern crate rustc_errors;
 extern crate rustc_fluent_macro;
+extern crate rustc_fs_util;
 extern crate rustc_hir;
 extern crate rustc_macros;
 extern crate rustc_metadata;
@@ -35,7 +42,8 @@ extern crate rustc_middle;
 extern crate rustc_session;
 extern crate rustc_span;
 extern crate rustc_target;
-extern crate tempfile;
+#[macro_use]
+extern crate tracing;
 
 // This prevents duplicating functions and statics that are already part of the host rustc process.
 #[allow(unused_extern_crates)]
@@ -57,6 +65,7 @@ mod coverageinfo;
 mod debuginfo;
 mod declare;
 mod errors;
+mod gcc_util;
 mod int;
 mod intrinsic;
 mod mono_item;
@@ -64,30 +73,44 @@ mod type_;
 mod type_of;
 
 use std::any::Any;
-use std::sync::{Arc, Mutex};
-
-use crate::errors::LTONotSupported;
-use gccjit::{Context, OptimizationLevel, CType};
+use std::fmt::Debug;
+use std::sync::Arc;
+use std::sync::Mutex;
+#[cfg(not(feature="master"))]
+use std::sync::atomic::AtomicBool;
+#[cfg(not(feature="master"))]
+use std::sync::atomic::Ordering;
+
+use gccjit::{Context, OptimizationLevel};
+#[cfg(feature="master")]
+use gccjit::TargetInfo;
+#[cfg(not(feature="master"))]
+use gccjit::CType;
+use errors::LTONotSupported;
 use rustc_ast::expand::allocator::AllocatorKind;
 use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen};
 use rustc_codegen_ssa::base::codegen_crate;
-use rustc_codegen_ssa::back::write::{CodegenContext, FatLTOInput, ModuleConfig, TargetMachineFactoryFn};
+use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput, ModuleConfig, TargetMachineFactoryFn};
 use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule};
 use rustc_codegen_ssa::target_features::supported_target_features;
-use rustc_codegen_ssa::traits::{CodegenBackend, ExtraBackendMethods, ModuleBufferMethods, ThinBufferMethods, WriteBackendMethods};
-use rustc_data_structures::fx::FxHashMap;
+use rustc_data_structures::fx::FxIndexMap;
+use rustc_data_structures::sync::IntoDynSyncSend;
+use rustc_codegen_ssa::traits::{CodegenBackend, ExtraBackendMethods, ThinBufferMethods, WriteBackendMethods};
 use rustc_errors::{DiagnosticMessage, ErrorGuaranteed, Handler, SubdiagnosticMessage};
 use rustc_fluent_macro::fluent_messages;
 use rustc_metadata::EncodedMetadata;
 use rustc_middle::dep_graph::{WorkProduct, WorkProductId};
+use rustc_middle::util::Providers;
 use rustc_middle::ty::TyCtxt;
-use rustc_middle::ty::query::Providers;
 use rustc_session::config::{Lto, OptLevel, OutputFilenames};
 use rustc_session::Session;
 use rustc_span::Symbol;
 use rustc_span::fatal_error::FatalError;
 use tempfile::TempDir;
 
+use crate::back::lto::ModuleBuffer;
+use crate::gcc_util::target_cpu;
+
 fluent_messages! { "../messages.ftl" }
 
 pub struct PrintOnPanic<F: Fn() -> String>(pub F);
@@ -100,9 +123,47 @@ impl<F: Fn() -> String> Drop for PrintOnPanic<F> {
     }
 }
 
+#[cfg(not(feature="master"))]
+#[derive(Debug)]
+pub struct TargetInfo {
+    supports_128bit_integers: AtomicBool,
+}
+
+#[cfg(not(feature="master"))]
+impl TargetInfo {
+    fn cpu_supports(&self, _feature: &str) -> bool {
+        false
+    }
+
+    fn supports_128bit_int(&self) -> bool {
+        self.supports_128bit_integers.load(Ordering::SeqCst)
+    }
+}
+
+#[derive(Clone)]
+pub struct LockedTargetInfo {
+    info: Arc<Mutex<IntoDynSyncSend<TargetInfo>>>,
+}
+
+impl Debug for LockedTargetInfo {
+    fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        self.info.lock().expect("lock").fmt(formatter)
+    }
+}
+
+impl LockedTargetInfo {
+    fn cpu_supports(&self, feature: &str) -> bool {
+        self.info.lock().expect("lock").cpu_supports(feature)
+    }
+
+    fn supports_128bit_int(&self) -> bool {
+        self.info.lock().expect("lock").supports_128bit_int()
+    }
+}
+
 #[derive(Clone)]
 pub struct GccCodegenBackend {
-    supports_128bit_integers: Arc<Mutex<bool>>,
+    target_info: LockedTargetInfo,
 }
 
 impl CodegenBackend for GccCodegenBackend {
@@ -111,23 +172,41 @@ impl CodegenBackend for GccCodegenBackend {
     }
 
     fn init(&self, sess: &Session) {
-        if sess.lto() != Lto::No {
+        #[cfg(feature="master")]
+        {
+            let target_cpu = target_cpu(sess);
+
+            // Get the second TargetInfo with the correct CPU features by setting the arch.
+            let context = Context::default();
+            if target_cpu != "generic" {
+                context.add_command_line_option(&format!("-march={}", target_cpu));
+            }
+
+            **self.target_info.info.lock().expect("lock") = context.get_target_info();
+        }
+
+        #[cfg(feature="master")]
+        gccjit::set_global_personality_function_name(b"rust_eh_personality\0");
+        if sess.lto() == Lto::Thin {
             sess.emit_warning(LTONotSupported {});
         }
 
-        let temp_dir = TempDir::new().expect("cannot create temporary directory");
-        let temp_file = temp_dir.into_path().join("result.asm");
-        let check_context = Context::default();
-        check_context.set_print_errors_to_stderr(false);
-        let _int128_ty = check_context.new_c_type(CType::UInt128t);
-        // NOTE: we cannot just call compile() as this would require other files than libgccjit.so.
-        check_context.compile_to_file(gccjit::OutputKind::Assembler, temp_file.to_str().expect("path to str"));
-        *self.supports_128bit_integers.lock().expect("lock") = check_context.get_last_error() == Ok(None);
+        #[cfg(not(feature="master"))]
+        {
+            let temp_dir = TempDir::new().expect("cannot create temporary directory");
+            let temp_file = temp_dir.into_path().join("result.asm");
+            let check_context = Context::default();
+            check_context.set_print_errors_to_stderr(false);
+            let _int128_ty = check_context.new_c_type(CType::UInt128t);
+            // NOTE: we cannot just call compile() as this would require other files than libgccjit.so.
+            check_context.compile_to_file(gccjit::OutputKind::Assembler, temp_file.to_str().expect("path to str"));
+            self.target_info.info.lock().expect("lock").supports_128bit_integers.store(check_context.get_last_error() == Ok(None), Ordering::SeqCst);
+        }
     }
 
     fn provide(&self, providers: &mut Providers) {
-        // FIXME(antoyo) compute list of enabled features from cli flags
-        providers.global_backend_features = |_tcx, ()| vec![];
+        providers.global_backend_features =
+            |tcx, ()| gcc_util::global_gcc_features(tcx.sess, true)
     }
 
     fn codegen_crate<'tcx>(&self, tcx: TyCtxt<'tcx>, metadata: EncodedMetadata, need_metadata_module: bool) -> Box<dyn Any> {
@@ -137,7 +216,7 @@ impl CodegenBackend for GccCodegenBackend {
         Box::new(res)
     }
 
-    fn join_codegen(&self, ongoing_codegen: Box<dyn Any>, sess: &Session, _outputs: &OutputFilenames) -> Result<(CodegenResults, FxHashMap<WorkProductId, WorkProduct>), ErrorGuaranteed> {
+    fn join_codegen(&self, ongoing_codegen: Box<dyn Any>, sess: &Session, _outputs: &OutputFilenames) -> Result<(CodegenResults, FxIndexMap<WorkProductId, WorkProduct>), ErrorGuaranteed> {
         let (codegen_results, work_products) = ongoing_codegen
             .downcast::<rustc_codegen_ssa::back::write::OngoingCodegen<GccCodegenBackend>>()
             .expect("Expected GccCodegenBackend's OngoingCodegen, found Box<Any>")
@@ -158,21 +237,26 @@ impl CodegenBackend for GccCodegenBackend {
     }
 
     fn target_features(&self, sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
-        target_features(sess, allow_unstable)
+        target_features(sess, allow_unstable, &self.target_info)
     }
 }
 
 impl ExtraBackendMethods for GccCodegenBackend {
-    fn codegen_allocator<'tcx>(&self, tcx: TyCtxt<'tcx>, module_name: &str, kind: AllocatorKind) -> Self::Module {
+    fn codegen_allocator<'tcx>(&self, tcx: TyCtxt<'tcx>, module_name: &str, kind: AllocatorKind, alloc_error_handler_kind: AllocatorKind) -> Self::Module {
         let mut mods = GccContext {
             context: Context::default(),
+            should_combine_object_files: false,
+            temp_dir: None,
         };
-        unsafe { allocator::codegen(tcx, &mut mods, module_name, kind); }
+
+        // TODO(antoyo): only set for x86.
+        mods.context.add_command_line_option("-masm=intel");
+        unsafe { allocator::codegen(tcx, &mut mods, module_name, kind, alloc_error_handler_kind); }
         mods
     }
 
     fn compile_codegen_unit(&self, tcx: TyCtxt<'_>, cgu_name: Symbol) -> (ModuleCodegen<Self::Module>, u64) {
-        base::compile_codegen_unit(tcx, cgu_name, *self.supports_128bit_integers.lock().expect("lock"))
+        base::compile_codegen_unit(tcx, cgu_name, self.target_info.clone())
     }
 
     fn target_machine_factory(&self, _sess: &Session, _opt_level: OptLevel, _features: &[String]) -> TargetMachineFactoryFn<Self> {
@@ -183,14 +267,6 @@ impl ExtraBackendMethods for GccCodegenBackend {
     }
 }
 
-pub struct ModuleBuffer;
-
-impl ModuleBufferMethods for ModuleBuffer {
-    fn data(&self) -> &[u8] {
-        unimplemented!();
-    }
-}
-
 pub struct ThinBuffer;
 
 impl ThinBufferMethods for ThinBuffer {
@@ -201,6 +277,9 @@ impl ThinBufferMethods for ThinBuffer {
 
 pub struct GccContext {
     context: Context<'static>,
+    should_combine_object_files: bool,
+    // Temporary directory used by LTO. We keep it here so that it's not removed before linking.
+    temp_dir: Option<TempDir>,
 }
 
 unsafe impl Send for GccContext {}
@@ -215,18 +294,8 @@ impl WriteBackendMethods for GccCodegenBackend {
     type ThinData = ();
     type ThinBuffer = ThinBuffer;
 
-    fn run_fat_lto(_cgcx: &CodegenContext<Self>, mut modules: Vec<FatLTOInput<Self>>, _cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>) -> Result<LtoModuleCodegen<Self>, FatalError> {
-        // TODO(antoyo): implement LTO by sending -flto to libgccjit and adding the appropriate gcc linker plugins.
-        // NOTE: implemented elsewhere.
-        // TODO(antoyo): what is implemented elsewhere ^ ?
-        let module =
-            match modules.remove(0) {
-                FatLTOInput::InMemory(module) => module,
-                FatLTOInput::Serialized { .. } => {
-                    unimplemented!();
-                }
-            };
-        Ok(LtoModuleCodegen::Fat { module, _serialized_bitcode: vec![] })
+    fn run_fat_lto(cgcx: &CodegenContext<Self>, modules: Vec<FatLtoInput<Self>>, cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>) -> Result<LtoModuleCodegen<Self>, FatalError> {
+        back::lto::run_fat(cgcx, modules, cached_modules)
     }
 
     fn run_thin_lto(_cgcx: &CodegenContext<Self>, _modules: Vec<(String, Self::ThinBuffer)>, _cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>) -> Result<(Vec<LtoModuleCodegen<Self>>, Vec<WorkProduct>), FatalError> {
@@ -237,6 +306,10 @@ impl WriteBackendMethods for GccCodegenBackend {
         unimplemented!();
     }
 
+    fn print_statistics(&self) {
+        unimplemented!()
+    }
+
     unsafe fn optimize(_cgcx: &CodegenContext<Self>, _diag_handler: &Handler, module: &ModuleCodegen<Self::Module>, config: &ModuleConfig) -> Result<(), FatalError> {
         module.module_llvm.context.set_optimization_level(to_gcc_opt_level(config.opt_level));
         Ok(())
@@ -271,8 +344,19 @@ impl WriteBackendMethods for GccCodegenBackend {
 /// This is the entrypoint for a hot plugged rustc_codegen_gccjit
 #[no_mangle]
 pub fn __rustc_codegen_backend() -> Box<dyn CodegenBackend> {
+    #[cfg(feature="master")]
+    let info = {
+        // Check whether the target supports 128-bit integers.
+        let context = Context::default();
+        Arc::new(Mutex::new(IntoDynSyncSend(context.get_target_info())))
+    };
+    #[cfg(not(feature="master"))]
+    let info = Arc::new(Mutex::new(IntoDynSyncSend(TargetInfo {
+        supports_128bit_integers: AtomicBool::new(false),
+    })));
+
     Box::new(GccCodegenBackend {
-        supports_128bit_integers: Arc::new(Mutex::new(false)),
+        target_info: LockedTargetInfo { info },
     })
 }
 
@@ -291,22 +375,7 @@ fn to_gcc_opt_level(optlevel: Option<OptLevel>) -> OptimizationLevel {
     }
 }
 
-fn handle_native(name: &str) -> &str {
-    if name != "native" {
-        return name;
-    }
-
-    unimplemented!();
-}
-
-pub fn target_cpu(sess: &Session) -> &str {
-    match sess.opts.cg.target_cpu {
-        Some(ref name) => handle_native(name),
-        None => handle_native(sess.target.cpu.as_ref()),
-    }
-}
-
-pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
+pub fn target_features(sess: &Session, allow_unstable: bool, target_info: &LockedTargetInfo) -> Vec<Symbol> {
     supported_target_features(sess)
         .iter()
         .filter_map(
@@ -315,26 +384,13 @@ pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
             },
         )
         .filter(|_feature| {
-            // TODO(antoyo): implement a way to get enabled feature in libgccjit.
-            // Probably using the equivalent of __builtin_cpu_supports.
-            // TODO(antoyo): maybe use whatever outputs the following command:
-            // gcc -march=native -Q --help=target
-            #[cfg(feature="master")]
-            {
-                // NOTE: the CPU in the CI doesn't support sse4a, so disable it to make the stdarch tests pass in the CI.
-                (_feature.contains("sse") || _feature.contains("avx")) && !_feature.contains("avx512") && !_feature.contains("sse4a")
-            }
-            #[cfg(not(feature="master"))]
-            {
-                false
-            }
+            target_info.cpu_supports(_feature)
             /*
                adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512ifma,
                avx512pf, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpopcntdq,
                bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, gfni, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm,
                sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, vaes, vpclmulqdq, xsave, xsavec, xsaveopt, xsaves
              */
-            //false
         })
         .map(|feature| Symbol::intern(feature))
         .collect()
diff --git a/compiler/rustc_codegen_gcc/src/mono_item.rs b/compiler/rustc_codegen_gcc/src/mono_item.rs
index c1f6340866c..3322d56513b 100644
--- a/compiler/rustc_codegen_gcc/src/mono_item.rs
+++ b/compiler/rustc_codegen_gcc/src/mono_item.rs
@@ -31,7 +31,7 @@ impl<'gcc, 'tcx> PreDefineMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
 
     #[cfg_attr(not(feature="master"), allow(unused_variables))]
     fn predefine_fn(&self, instance: Instance<'tcx>, linkage: Linkage, visibility: Visibility, symbol_name: &str) {
-        assert!(!instance.substs.needs_infer());
+        assert!(!instance.args.has_infer());
 
         let fn_abi = self.fn_abi_of_instance(instance, ty::List::empty());
         self.linkage.set(base::linkage_to_gcc(linkage));
diff --git a/compiler/rustc_codegen_gcc/src/type_.rs b/compiler/rustc_codegen_gcc/src/type_.rs
index daa661f35c4..31899740514 100644
--- a/compiler/rustc_codegen_gcc/src/type_.rs
+++ b/compiler/rustc_codegen_gcc/src/type_.rs
@@ -54,6 +54,23 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
         self.u128_type
     }
 
+    pub fn type_ptr_to(&self, ty: Type<'gcc>) -> Type<'gcc> {
+        ty.make_pointer()
+    }
+
+    pub fn type_ptr_to_ext(&self, ty: Type<'gcc>, _address_space: AddressSpace) -> Type<'gcc> {
+        // TODO(antoyo): use address_space, perhaps with TYPE_ADDR_SPACE?
+        ty.make_pointer()
+    }
+
+    pub fn type_i8p(&self) -> Type<'gcc> {
+        self.type_ptr_to(self.type_i8())
+    }
+
+    pub fn type_i8p_ext(&self, address_space: AddressSpace) -> Type<'gcc> {
+        self.type_ptr_to_ext(self.type_i8(), address_space)
+    }
+
     pub fn type_pointee_for_align(&self, align: Align) -> Type<'gcc> {
         // FIXME(eddyb) We could find a better approximation if ity.align < align.
         let ity = Integer::approximate_align(self, align);
@@ -149,13 +166,12 @@ impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
         }
     }
 
-    fn type_ptr_to(&self, ty: Type<'gcc>) -> Type<'gcc> {
-        ty.make_pointer()
+    fn type_ptr(&self) -> Type<'gcc> {
+        self.type_ptr_to(self.type_void())
     }
 
-    fn type_ptr_to_ext(&self, ty: Type<'gcc>, _address_space: AddressSpace) -> Type<'gcc> {
-        // TODO(antoyo): use address_space, perhaps with TYPE_ADDR_SPACE?
-        ty.make_pointer()
+    fn type_ptr_ext(&self, address_space: AddressSpace) -> Type<'gcc> {
+        self.type_ptr_to_ext(self.type_void(), address_space)
     }
 
     fn element_type(&self, ty: Type<'gcc>) -> Type<'gcc> {
@@ -280,16 +296,4 @@ pub fn struct_fields<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, layout: TyAndLayout
 }
 
 impl<'gcc, 'tcx> TypeMembershipMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
-    fn set_type_metadata(&self, _function: RValue<'gcc>, _typeid: String) {
-        // Unsupported.
-    }
-
-    fn typeid_metadata(&self, _typeid: String) -> RValue<'gcc> {
-        // Unsupported.
-        self.context.new_rvalue_from_int(self.int_type, 0)
-    }
-
-    fn set_kcfi_type_metadata(&self, _function: RValue<'gcc>, _kcfi_typeid: u32) {
-        // Unsupported.
-    }
 }
diff --git a/compiler/rustc_codegen_gcc/src/type_of.rs b/compiler/rustc_codegen_gcc/src/type_of.rs
index 5df8c1a209d..c2eab295acd 100644
--- a/compiler/rustc_codegen_gcc/src/type_of.rs
+++ b/compiler/rustc_codegen_gcc/src/type_of.rs
@@ -4,7 +4,7 @@ use gccjit::{Struct, Type};
 use crate::rustc_codegen_ssa::traits::{BaseTypeMethods, DerivedTypeMethods, LayoutTypeMethods};
 use rustc_middle::bug;
 use rustc_middle::ty::{self, Ty, TypeVisitableExt};
-use rustc_middle::ty::layout::{FnAbiOf, LayoutOf, TyAndLayout};
+use rustc_middle::ty::layout::{LayoutOf, TyAndLayout};
 use rustc_middle::ty::print::with_no_trimmed_paths;
 use rustc_target::abi::{self, Abi, Align, F32, F64, FieldsShape, Int, Integer, Pointer, PointeeInfo, Size, TyAbiInterface, Variants};
 use rustc_target::abi::call::{CastTarget, FnAbi, Reg};
@@ -74,8 +74,8 @@ fn uncached_gcc_type<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, layout: TyAndLayout
         Abi::ScalarPair(..) => {
             return cx.type_struct(
                 &[
-                    layout.scalar_pair_element_gcc_type(cx, 0, false),
-                    layout.scalar_pair_element_gcc_type(cx, 1, false),
+                    layout.scalar_pair_element_gcc_type(cx, 0),
+                    layout.scalar_pair_element_gcc_type(cx, 1),
                 ],
                 false,
             );
@@ -101,7 +101,7 @@ fn uncached_gcc_type<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, layout: TyAndLayout
             if let (&ty::Generator(_, _, _), &Variants::Single { index }) =
                 (layout.ty.kind(), &layout.variants)
             {
-                write!(&mut name, "::{}", ty::GeneratorSubsts::variant_name(index)).unwrap();
+                write!(&mut name, "::{}", ty::GeneratorArgs::variant_name(index)).unwrap();
             }
             Some(name)
         }
@@ -150,7 +150,7 @@ pub trait LayoutGccExt<'tcx> {
     fn gcc_type<'gcc>(&self, cx: &CodegenCx<'gcc, 'tcx>) -> Type<'gcc>;
     fn immediate_gcc_type<'gcc>(&self, cx: &CodegenCx<'gcc, 'tcx>) -> Type<'gcc>;
     fn scalar_gcc_type_at<'gcc>(&self, cx: &CodegenCx<'gcc, 'tcx>, scalar: &abi::Scalar, offset: Size) -> Type<'gcc>;
-    fn scalar_pair_element_gcc_type<'gcc>(&self, cx: &CodegenCx<'gcc, 'tcx>, index: usize, immediate: bool) -> Type<'gcc>;
+    fn scalar_pair_element_gcc_type<'gcc>(&self, cx: &CodegenCx<'gcc, 'tcx>, index: usize) -> Type<'gcc>;
     fn gcc_field_index(&self, index: usize) -> u64;
     fn pointee_info_at<'gcc>(&self, cx: &CodegenCx<'gcc, 'tcx>, offset: Size) -> Option<PointeeInfo>;
 }
@@ -159,8 +159,7 @@ impl<'tcx> LayoutGccExt<'tcx> for TyAndLayout<'tcx> {
     fn is_gcc_immediate(&self) -> bool {
         match self.abi {
             Abi::Scalar(_) | Abi::Vector { .. } => true,
-            Abi::ScalarPair(..) => false,
-            Abi::Uninhabited | Abi::Aggregate { .. } => self.is_zst(),
+            Abi::ScalarPair(..) | Abi::Uninhabited | Abi::Aggregate { .. } => false,
         }
     }
 
@@ -183,6 +182,10 @@ impl<'tcx> LayoutGccExt<'tcx> for TyAndLayout<'tcx> {
     /// of that field's type - this is useful for taking the address of
     /// that field and ensuring the struct has the right alignment.
     fn gcc_type<'gcc>(&self, cx: &CodegenCx<'gcc, 'tcx>) -> Type<'gcc> {
+        use crate::rustc_middle::ty::layout::FnAbiOf;
+        // This must produce the same result for `repr(transparent)` wrappers as for the inner type!
+        // In other words, this should generally not look at the type at all, but only at the
+        // layout.
         if let Abi::Scalar(ref scalar) = self.abi {
             // Use a different cache for scalars because pointers to DSTs
             // can be either fat or thin (data pointers of fat pointers).
@@ -191,12 +194,9 @@ impl<'tcx> LayoutGccExt<'tcx> for TyAndLayout<'tcx> {
             }
             let ty =
                 match *self.ty.kind() {
-                    ty::Ref(_, ty, _) | ty::RawPtr(ty::TypeAndMut { ty, .. }) => {
-                        cx.type_ptr_to(cx.layout_of(ty).gcc_type(cx))
-                    }
-                    ty::Adt(def, _) if def.is_box() => {
-                        cx.type_ptr_to(cx.layout_of(self.ty.boxed_ty()).gcc_type(cx))
-                    }
+                    // NOTE: we cannot remove this match like in the LLVM codegen because the call
+                    // to fn_ptr_backend_type handle the on-stack attribute.
+                    // TODO(antoyo): find a less hackish way to hande the on-stack attribute.
                     ty::FnPtr(sig) => cx.fn_ptr_backend_type(&cx.fn_abi_of_fn_ptr(sig, ty::List::empty())),
                     _ => self.scalar_gcc_type_at(cx, scalar, Size::ZERO),
                 };
@@ -273,23 +273,10 @@ impl<'tcx> LayoutGccExt<'tcx> for TyAndLayout<'tcx> {
         }
     }
 
-    fn scalar_pair_element_gcc_type<'gcc>(&self, cx: &CodegenCx<'gcc, 'tcx>, index: usize, immediate: bool) -> Type<'gcc> {
-        // TODO(antoyo): remove llvm hack:
-        // HACK(eddyb) special-case fat pointers until LLVM removes
-        // pointee types, to avoid bitcasting every `OperandRef::deref`.
-        match self.ty.kind() {
-            ty::Ref(..) | ty::RawPtr(_) => {
-                return self.field(cx, index).gcc_type(cx);
-            }
-            // only wide pointer boxes are handled as pointers
-            // thin pointer boxes with scalar allocators are handled by the general logic below
-            ty::Adt(def, substs) if def.is_box() && cx.layout_of(substs.type_at(1)).is_zst() => {
-                let ptr_ty = cx.tcx.mk_mut_ptr(self.ty.boxed_ty());
-                return cx.layout_of(ptr_ty).scalar_pair_element_gcc_type(cx, index, immediate);
-            }
-            _ => {}
-        }
-
+    fn scalar_pair_element_gcc_type<'gcc>(&self, cx: &CodegenCx<'gcc, 'tcx>, index: usize) -> Type<'gcc> {
+        // This must produce the same result for `repr(transparent)` wrappers as for the inner type!
+        // In other words, this should generally not look at the type at all, but only at the
+        // layout.
         let (a, b) = match self.abi {
             Abi::ScalarPair(ref a, ref b) => (a, b),
             _ => bug!("TyAndLayout::scalar_pair_element_llty({:?}): not applicable", self),
@@ -368,8 +355,8 @@ impl<'gcc, 'tcx> LayoutTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
         layout.gcc_field_index(index)
     }
 
-    fn scalar_pair_element_backend_type(&self, layout: TyAndLayout<'tcx>, index: usize, immediate: bool) -> Type<'gcc> {
-        layout.scalar_pair_element_gcc_type(self, index, immediate)
+    fn scalar_pair_element_backend_type(&self, layout: TyAndLayout<'tcx>, index: usize, _immediate: bool) -> Type<'gcc> {
+        layout.scalar_pair_element_gcc_type(self, index)
     }
 
     fn cast_backend_type(&self, ty: &CastTarget) -> Type<'gcc> {
@@ -384,8 +371,8 @@ impl<'gcc, 'tcx> LayoutTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
         unimplemented!();
     }
 
-    fn fn_decl_backend_type(&self, _fn_abi: &FnAbi<'tcx, Ty<'tcx>>) -> Type<'gcc> {
-        // FIXME(antoyo): return correct type.
-        self.type_void()
+    fn fn_decl_backend_type(&self, fn_abi: &FnAbi<'tcx, Ty<'tcx>>) -> Type<'gcc> {
+        let (return_type, param_types, variadic, _) = fn_abi.gcc_type(self);
+        self.context.new_function_pointer_type(None, return_type, &param_types, variadic)
     }
 }