Merge commit '11a0cceab966e5ff1058ddbcab5977e8a1d6d290' into subtree-update_cg_gcc_2023-10-09

author: Antoni Boucher <bouanto@zoho.com> 2023-10-09 15:53:34 -0400
committer: Antoni Boucher <bouanto@zoho.com> 2023-10-09 15:53:34 -0400
commit: 30290c8b411a837b2e19293391956f5f779608ab (patch)
tree: 0a59bbb2fda11bca09f135f56ea3025c5daea590 /compiler/rustc_codegen_gcc/src
parent: cdddcd3bea35049dab56888d4391cb9b5b1b4491 (diff)
parent: 11a0cceab966e5ff1058ddbcab5977e8a1d6d290 (diff)
download: rust-30290c8b411a837b2e19293391956f5f779608ab.tar.gz
rust-30290c8b411a837b2e19293391956f5f779608ab.zip
18 files changed, 1275 insertions, 291 deletions
diff --git a/compiler/rustc_codegen_gcc/src/abi.rs b/compiler/rustc_codegen_gcc/src/abi.rs
index a49530ebb4c..35bb0b6e5f4 100644
--- a/compiler/rustc_codegen_gcc/src/abi.rs
+++ b/compiler/rustc_codegen_gcc/src/abi.rs
@@ -3,7 +3,9 @@ use rustc_codegen_ssa::traits::{AbiBuilderMethods, BaseTypeMethods};
 use rustc_data_structures::fx::FxHashSet;
 use rustc_middle::bug;
 use rustc_middle::ty::Ty;
-use rustc_target::abi::call::{CastTarget, FnAbi, PassMode, Reg, RegKind};
+#[cfg(feature = "master")]
+use rustc_session::config;
+use rustc_target::abi::call::{ArgAttributes, CastTarget, FnAbi, PassMode, Reg, RegKind};
 
 use crate::builder::Builder;
 use crate::context::CodegenCx;
@@ -120,30 +122,50 @@ impl<'gcc, 'tcx> FnAbiGccExt<'gcc, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
                 }
             };
 
+        #[cfg(feature = "master")]
+        let apply_attrs = |ty: Type<'gcc>, attrs: &ArgAttributes| {
+            if cx.sess().opts.optimize != config::OptLevel::No
+                && attrs.regular.contains(rustc_target::abi::call::ArgAttribute::NoAlias)
+            {
+                ty.make_restrict()
+            } else {
+                ty
+            }
+        };
+        #[cfg(not(feature = "master"))]
+        let apply_attrs = |ty: Type<'gcc>, _attrs: &ArgAttributes| {
+            ty
+        };
+
         for arg in self.args.iter() {
             let arg_ty = match arg.mode {
                 PassMode::Ignore => continue,
-                PassMode::Direct(_) => arg.layout.immediate_gcc_type(cx),
-                PassMode::Pair(..) => {
-                    argument_tys.push(arg.layout.scalar_pair_element_gcc_type(cx, 0));
-                    argument_tys.push(arg.layout.scalar_pair_element_gcc_type(cx, 1));
+                PassMode::Pair(a, b) => {
+                    argument_tys.push(apply_attrs(arg.layout.scalar_pair_element_gcc_type(cx, 0), &a));
+                    argument_tys.push(apply_attrs(arg.layout.scalar_pair_element_gcc_type(cx, 1), &b));
                     continue;
                 }
-                PassMode::Indirect { meta_attrs: Some(_), .. } => {
-                    unimplemented!();
-                }
                 PassMode::Cast { ref cast, pad_i32 } => {
                     // add padding
                     if pad_i32 {
                         argument_tys.push(Reg::i32().gcc_type(cx));
                     }
-                    cast.gcc_type(cx)
+                    let ty = cast.gcc_type(cx);
+                    apply_attrs(ty, &cast.attrs)
                 }
-                PassMode::Indirect { meta_attrs: None, on_stack: true, .. } => {
+                PassMode::Indirect { attrs: _, meta_attrs: None, on_stack: true } => {
+                    // This is a "byval" argument, so we don't apply the `restrict` attribute on it.
                     on_stack_param_indices.insert(argument_tys.len());
                     arg.memory_ty(cx)
                 },
-                PassMode::Indirect { meta_attrs: None, on_stack: false, .. } => cx.type_ptr_to(arg.memory_ty(cx)),
+                PassMode::Direct(attrs) => apply_attrs(arg.layout.immediate_gcc_type(cx), &attrs),
+                PassMode::Indirect { attrs, meta_attrs: None, on_stack: false } => {
+                    apply_attrs(cx.type_ptr_to(arg.memory_ty(cx)), &attrs)
+                }
+                PassMode::Indirect { attrs, meta_attrs: Some(meta_attrs), on_stack } => {
+                    assert!(!on_stack);
+                    apply_attrs(apply_attrs(cx.type_ptr_to(arg.memory_ty(cx)), &attrs), &meta_attrs)
+                }
             };
             argument_tys.push(arg_ty);
         }
diff --git a/compiler/rustc_codegen_gcc/src/allocator.rs b/compiler/rustc_codegen_gcc/src/allocator.rs
index edd7ab722f6..c8c098e2973 100644
--- a/compiler/rustc_codegen_gcc/src/allocator.rs
+++ b/compiler/rustc_codegen_gcc/src/allocator.rs
@@ -1,6 +1,6 @@
 #[cfg(feature="master")]
 use gccjit::FnAttribute;
-use gccjit::{FunctionType, GlobalKind, ToRValue};
+use gccjit::{Context, FunctionType, GlobalKind, ToRValue, Type};
 use rustc_ast::expand::allocator::{
     alloc_error_handler_name, default_fn_name, global_fn_name, AllocatorKind, AllocatorTy,
     ALLOCATOR_METHODS, NO_ALLOC_SHIM_IS_UNSTABLE,
@@ -22,7 +22,6 @@ pub(crate) unsafe fn codegen(tcx: TyCtxt<'_>, mods: &mut GccContext, _module_nam
         };
     let i8 = context.new_type::<i8>();
     let i8p = i8.make_pointer();
-    let void = context.new_type::<()>();
 
     if kind == AllocatorKind::Default {
         for method in ALLOCATOR_METHODS {
@@ -47,67 +46,62 @@ pub(crate) unsafe fn codegen(tcx: TyCtxt<'_>, mods: &mut GccContext, _module_nam
                     panic!("invalid allocator output")
                 }
             };
-            let name = global_fn_name(method.name);
+            let from_name = global_fn_name(method.name);
+            let to_name = default_fn_name(method.name);
 
-            let args: Vec<_> = types.iter().enumerate()
-                .map(|(index, typ)| context.new_parameter(None, *typ, &format!("param{}", index)))
-                .collect();
-            let func = context.new_function(None, FunctionType::Exported, output.unwrap_or(void), &args, name, false);
+            create_wrapper_function(tcx, context, &from_name, &to_name, &types, output);
+        }
+    }
 
-            if tcx.sess.target.options.default_hidden_visibility {
-                #[cfg(feature="master")]
-                func.add_attribute(FnAttribute::Visibility(gccjit::Visibility::Hidden));
-            }
-            if tcx.sess.must_emit_unwind_tables() {
-                // TODO(antoyo): emit unwind tables.
-            }
+    // FIXME(bjorn3): Add noreturn attribute
+    create_wrapper_function(
+        tcx,
+        context,
+        "__rust_alloc_error_handler",
+        &alloc_error_handler_name(alloc_error_handler_kind),
+        &[usize, usize],
+        None,
+    );
 
-            let callee = default_fn_name(method.name);
-            let args: Vec<_> = types.iter().enumerate()
-                .map(|(index, typ)| context.new_parameter(None, *typ, &format!("param{}", index)))
-                .collect();
-            let callee = context.new_function(None, FunctionType::Extern, output.unwrap_or(void), &args, callee, false);
-            #[cfg(feature="master")]
-            callee.add_attribute(FnAttribute::Visibility(gccjit::Visibility::Hidden));
-
-            let block = func.new_block("entry");
-
-            let args = args
-                .iter()
-                .enumerate()
-                .map(|(i, _)| func.get_param(i as i32).to_rvalue())
-                .collect::<Vec<_>>();
-            let ret = context.new_call(None, callee, &args);
-            //llvm::LLVMSetTailCall(ret, True);
-            if output.is_some() {
-                block.end_with_return(None, ret);
-            }
-            else {
-                block.end_with_void_return(None);
-            }
+    let name = OomStrategy::SYMBOL.to_string();
+    let global = context.new_global(None, GlobalKind::Exported, i8, name);
+    let value = tcx.sess.opts.unstable_opts.oom.should_panic();
+    let value = context.new_rvalue_from_int(i8, value as i32);
+    global.global_set_initializer_rvalue(value);
 
-            // TODO(@Commeownist): Check if we need to emit some extra debugging info in certain circumstances
-            // as described in https://github.com/rust-lang/rust/commit/77a96ed5646f7c3ee8897693decc4626fe380643
-        }
-    }
+    let name = NO_ALLOC_SHIM_IS_UNSTABLE.to_string();
+    let global = context.new_global(None, GlobalKind::Exported, i8, name);
+    let value = context.new_rvalue_from_int(i8, 0);
+    global.global_set_initializer_rvalue(value);
+}
+
+fn create_wrapper_function(
+    tcx: TyCtxt<'_>,
+    context: &Context<'_>,
+    from_name: &str,
+    to_name: &str,
+    types: &[Type<'_>],
+    output: Option<Type<'_>>,
+) {
+    let void = context.new_type::<()>();
 
-    let types = [usize, usize];
-    let name = "__rust_alloc_error_handler".to_string();
     let args: Vec<_> = types.iter().enumerate()
         .map(|(index, typ)| context.new_parameter(None, *typ, &format!("param{}", index)))
         .collect();
-    let func = context.new_function(None, FunctionType::Exported, void, &args, name, false);
+    let func = context.new_function(None, FunctionType::Exported, output.unwrap_or(void), &args, from_name, false);
 
-    if tcx.sess.target.default_hidden_visibility {
+    if tcx.sess.target.options.default_hidden_visibility {
         #[cfg(feature="master")]
         func.add_attribute(FnAttribute::Visibility(gccjit::Visibility::Hidden));
     }
+    if tcx.sess.must_emit_unwind_tables() {
+        // TODO(antoyo): emit unwind tables.
+    }
 
-    let callee = alloc_error_handler_name(alloc_error_handler_kind);
     let args: Vec<_> = types.iter().enumerate()
         .map(|(index, typ)| context.new_parameter(None, *typ, &format!("param{}", index)))
         .collect();
-    let callee = context.new_function(None, FunctionType::Extern, void, &args, callee, false);
+    let callee = context.new_function(None, FunctionType::Extern, output.unwrap_or(void), &args, to_name, false);
     #[cfg(feature="master")]
     callee.add_attribute(FnAttribute::Visibility(gccjit::Visibility::Hidden));
 
@@ -118,18 +112,15 @@ pub(crate) unsafe fn codegen(tcx: TyCtxt<'_>, mods: &mut GccContext, _module_nam
         .enumerate()
         .map(|(i, _)| func.get_param(i as i32).to_rvalue())
         .collect::<Vec<_>>();
-    let _ret = context.new_call(None, callee, &args);
+    let ret = context.new_call(None, callee, &args);
     //llvm::LLVMSetTailCall(ret, True);
-    block.end_with_void_return(None);
-
-    let name = OomStrategy::SYMBOL.to_string();
-    let global = context.new_global(None, GlobalKind::Exported, i8, name);
-    let value = tcx.sess.opts.unstable_opts.oom.should_panic();
-    let value = context.new_rvalue_from_int(i8, value as i32);
-    global.global_set_initializer_rvalue(value);
+    if output.is_some() {
+        block.end_with_return(None, ret);
+    }
+    else {
+        block.end_with_void_return(None);
+    }
 
-    let name = NO_ALLOC_SHIM_IS_UNSTABLE.to_string();
-    let global = context.new_global(None, GlobalKind::Exported, i8, name);
-    let value = context.new_rvalue_from_int(i8, 0);
-    global.global_set_initializer_rvalue(value);
+    // TODO(@Commeownist): Check if we need to emit some extra debugging info in certain circumstances
+    // as described in https://github.com/rust-lang/rust/commit/77a96ed5646f7c3ee8897693decc4626fe380643
 }
diff --git a/compiler/rustc_codegen_gcc/src/asm.rs b/compiler/rustc_codegen_gcc/src/asm.rs
index 905fdac92e9..f3a9ca77a67 100644
--- a/compiler/rustc_codegen_gcc/src/asm.rs
+++ b/compiler/rustc_codegen_gcc/src/asm.rs
@@ -452,10 +452,6 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
                         }
 
                         InlineAsmOperandRef::Const { ref string } => {
-                            // Const operands get injected directly into the template
-                            if att_dialect {
-                                template_str.push('$');
-                            }
                             template_str.push_str(string);
                         }
                     }
diff --git a/compiler/rustc_codegen_gcc/src/attributes.rs b/compiler/rustc_codegen_gcc/src/attributes.rs
index eb0cce19b85..971e019a4f6 100644
--- a/compiler/rustc_codegen_gcc/src/attributes.rs
+++ b/compiler/rustc_codegen_gcc/src/attributes.rs
@@ -4,72 +4,13 @@ use gccjit::Function;
 use rustc_attr::InstructionSetAttr;
 #[cfg(feature="master")]
 use rustc_attr::InlineAttr;
-use rustc_codegen_ssa::target_features::tied_target_features;
-use rustc_data_structures::fx::FxHashMap;
 use rustc_middle::ty;
 #[cfg(feature="master")]
 use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
-use rustc_session::Session;
 use rustc_span::symbol::sym;
-use smallvec::{smallvec, SmallVec};
 
 use crate::{context::CodegenCx, errors::TiedTargetFeatures};
-
-// Given a map from target_features to whether they are enabled or disabled,
-// ensure only valid combinations are allowed.
-pub fn check_tied_features(sess: &Session, features: &FxHashMap<&str, bool>) -> Option<&'static [&'static str]> {
-    for tied in tied_target_features(sess) {
-        // Tied features must be set to the same value, or not set at all
-        let mut tied_iter = tied.iter();
-        let enabled = features.get(tied_iter.next().unwrap());
-        if tied_iter.any(|feature| enabled != features.get(feature)) {
-            return Some(tied);
-        }
-    }
-    None
-}
-
-// TODO(antoyo): maybe move to a new module gcc_util.
-// To find a list of GCC's names, check https://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
-fn to_gcc_features<'a>(sess: &Session, s: &'a str) -> SmallVec<[&'a str; 2]> {
-    let arch = if sess.target.arch == "x86_64" { "x86" } else { &*sess.target.arch };
-    match (arch, s) {
-        ("x86", "sse4.2") => smallvec!["sse4.2", "crc32"],
-        ("x86", "pclmulqdq") => smallvec!["pclmul"],
-        ("x86", "rdrand") => smallvec!["rdrnd"],
-        ("x86", "bmi1") => smallvec!["bmi"],
-        ("x86", "cmpxchg16b") => smallvec!["cx16"],
-        ("x86", "avx512vaes") => smallvec!["vaes"],
-        ("x86", "avx512gfni") => smallvec!["gfni"],
-        ("x86", "avx512vpclmulqdq") => smallvec!["vpclmulqdq"],
-        // NOTE: seems like GCC requires 'avx512bw' for 'avx512vbmi2'.
-        ("x86", "avx512vbmi2") => smallvec!["avx512vbmi2", "avx512bw"],
-        // NOTE: seems like GCC requires 'avx512bw' for 'avx512bitalg'.
-        ("x86", "avx512bitalg") => smallvec!["avx512bitalg", "avx512bw"],
-        ("aarch64", "rcpc2") => smallvec!["rcpc-immo"],
-        ("aarch64", "dpb") => smallvec!["ccpp"],
-        ("aarch64", "dpb2") => smallvec!["ccdp"],
-        ("aarch64", "frintts") => smallvec!["fptoint"],
-        ("aarch64", "fcma") => smallvec!["complxnum"],
-        ("aarch64", "pmuv3") => smallvec!["perfmon"],
-        ("aarch64", "paca") => smallvec!["pauth"],
-        ("aarch64", "pacg") => smallvec!["pauth"],
-        // Rust ties fp and neon together. In LLVM neon implicitly enables fp,
-        // but we manually enable neon when a feature only implicitly enables fp
-        ("aarch64", "f32mm") => smallvec!["f32mm", "neon"],
-        ("aarch64", "f64mm") => smallvec!["f64mm", "neon"],
-        ("aarch64", "fhm") => smallvec!["fp16fml", "neon"],
-        ("aarch64", "fp16") => smallvec!["fullfp16", "neon"],
-        ("aarch64", "jsconv") => smallvec!["jsconv", "neon"],
-        ("aarch64", "sve") => smallvec!["sve", "neon"],
-        ("aarch64", "sve2") => smallvec!["sve2", "neon"],
-        ("aarch64", "sve2-aes") => smallvec!["sve2-aes", "neon"],
-        ("aarch64", "sve2-sm4") => smallvec!["sve2-sm4", "neon"],
-        ("aarch64", "sve2-sha3") => smallvec!["sve2-sha3", "neon"],
-        ("aarch64", "sve2-bitperm") => smallvec!["sve2-bitperm", "neon"],
-        (_, s) => smallvec![s],
-    }
-}
+use crate::gcc_util::{check_tied_features, to_gcc_features};
 
 /// Get GCC attribute for the provided inline heuristic.
 #[cfg(feature="master")]
@@ -114,6 +55,19 @@ pub fn from_fn_attrs<'gcc, 'tcx>(
         if let Some(attr) = inline_attr(cx, inline) {
             func.add_attribute(attr);
         }
+
+        if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::COLD) {
+            func.add_attribute(FnAttribute::Cold);
+        }
+        if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::FFI_RETURNS_TWICE) {
+            func.add_attribute(FnAttribute::ReturnsTwice);
+        }
+        if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::FFI_PURE) {
+            func.add_attribute(FnAttribute::Pure);
+        }
+        if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::FFI_CONST) {
+            func.add_attribute(FnAttribute::Const);
+        }
     }
 
     let function_features =
@@ -140,11 +94,33 @@ pub fn from_fn_attrs<'gcc, 'tcx>(
         }))
         .collect::<Vec<_>>();
 
-    // TODO(antoyo): check if we really need global backend features. (Maybe they could be applied
-    // globally?)
+    // TODO(antoyo): cg_llvm adds global features to each function so that LTO keep them.
+    // Check if GCC requires the same.
     let mut global_features = cx.tcx.global_backend_features(()).iter().map(|s| s.as_str());
     function_features.extend(&mut global_features);
-    let target_features = function_features.join(",");
+    let target_features = function_features
+        .iter()
+        .filter_map(|feature| {
+            // FIXME(antoyo): for some reasons, disabling SSE results in the following error when
+            // compiling Rust for Linux:
+            // SSE register return with SSE disabled
+            // TODO(antoyo): support soft-float and retpoline-external-thunk.
+            if feature.contains("soft-float") || feature.contains("retpoline-external-thunk") || *feature == "-sse" {
+                return None;
+            }
+
+            if feature.starts_with('-') {
+                Some(format!("no{}", feature))
+            }
+            else if feature.starts_with('+') {
+                Some(feature[1..].to_string())
+            }
+            else {
+                Some(feature.to_string())
+            }
+        })
+        .collect::<Vec<_>>()
+        .join(",");
     if !target_features.is_empty() {
         #[cfg(feature="master")]
         func.add_attribute(FnAttribute::Target(&target_features));
diff --git a/compiler/rustc_codegen_gcc/src/back/lto.rs b/compiler/rustc_codegen_gcc/src/back/lto.rs
new file mode 100644
index 00000000000..529454b119e
--- /dev/null
+++ b/compiler/rustc_codegen_gcc/src/back/lto.rs
@@ -0,0 +1,341 @@
+/// GCC requires to use the same toolchain for the whole compilation when doing LTO.
+/// So, we need the same version/commit of the linker (gcc) and lto front-end binaries (lto1,
+/// lto-wrapper, liblto_plugin.so).
+
+// FIXME(antoyo): the executables compiled with LTO are bigger than those compiled without LTO.
+// Since it is the opposite for cg_llvm, check if this is normal.
+//
+// Maybe we embed the bitcode in the final binary?
+// It doesn't look like we try to generate fat objects for the final binary.
+// Check if the way we combine the object files make it keep the LTO sections on the final link.
+// Maybe that's because the combined object files contain the IR (true) and the final link
+// does not remove it?
+//
+// TODO(antoyo): for performance, check which optimizations the C++ frontend enables.
+//
+// Fix these warnings:
+// /usr/bin/ld: warning: type of symbol `_RNvNvNvNtCs5JWOrf9uCus_5rayon11thread_pool19WORKER_THREAD_STATE7___getit5___KEY' changed from 1 to 6 in /tmp/ccKeUSiR.ltrans0.ltrans.o
+// /usr/bin/ld: warning: type of symbol `_RNvNvNvNvNtNtNtCsAj5i4SGTR7_3std4sync4mpmc5waker17current_thread_id5DUMMY7___getit5___KEY' changed from 1 to 6 in /tmp/ccKeUSiR.ltrans0.ltrans.o
+// /usr/bin/ld: warning: incremental linking of LTO and non-LTO objects; using -flinker-output=nolto-rel which will bypass whole program optimization
+
+use std::ffi::CString;
+use std::fs::{self, File};
+use std::path::{Path, PathBuf};
+
+use gccjit::OutputKind;
+use object::read::archive::ArchiveFile;
+use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule};
+use rustc_codegen_ssa::back::symbol_export;
+use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput};
+use rustc_codegen_ssa::traits::*;
+use rustc_codegen_ssa::{looks_like_rust_object_file, ModuleCodegen, ModuleKind};
+use rustc_data_structures::memmap::Mmap;
+use rustc_errors::{FatalError, Handler};
+use rustc_hir::def_id::LOCAL_CRATE;
+use rustc_middle::dep_graph::WorkProduct;
+use rustc_middle::middle::exported_symbols::{SymbolExportInfo, SymbolExportLevel};
+use rustc_session::config::{CrateType, Lto};
+use tempfile::{TempDir, tempdir};
+
+use crate::back::write::save_temp_bitcode;
+use crate::errors::{
+    DynamicLinkingWithLTO, LtoBitcodeFromRlib, LtoDisallowed, LtoDylib,
+};
+use crate::{GccCodegenBackend, GccContext, to_gcc_opt_level};
+
+/// We keep track of the computed LTO cache keys from the previous
+/// session to determine which CGUs we can reuse.
+//pub const THIN_LTO_KEYS_INCR_COMP_FILE_NAME: &str = "thin-lto-past-keys.bin";
+
+pub fn crate_type_allows_lto(crate_type: CrateType) -> bool {
+    match crate_type {
+        CrateType::Executable | CrateType::Dylib | CrateType::Staticlib | CrateType::Cdylib => true,
+        CrateType::Rlib | CrateType::ProcMacro => false,
+    }
+}
+
+struct LtoData {
+    // TODO(antoyo): use symbols_below_threshold.
+    //symbols_below_threshold: Vec<CString>,
+    upstream_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
+    tmp_path: TempDir,
+}
+
+fn prepare_lto(cgcx: &CodegenContext<GccCodegenBackend>, diag_handler: &Handler) -> Result<LtoData, FatalError> {
+    let export_threshold = match cgcx.lto {
+        // We're just doing LTO for our one crate
+        Lto::ThinLocal => SymbolExportLevel::Rust,
+
+        // We're doing LTO for the entire crate graph
+        Lto::Fat | Lto::Thin => symbol_export::crates_export_threshold(&cgcx.crate_types),
+
+        Lto::No => panic!("didn't request LTO but we're doing LTO"),
+    };
+
+    let tmp_path =
+        match tempdir() {
+            Ok(tmp_path) => tmp_path,
+            Err(error) => {
+                eprintln!("Cannot create temporary directory: {}", error);
+                return Err(FatalError);
+            },
+        };
+
+    let symbol_filter = &|&(ref name, info): &(String, SymbolExportInfo)| {
+        if info.level.is_below_threshold(export_threshold) || info.used {
+            Some(CString::new(name.as_str()).unwrap())
+        } else {
+            None
+        }
+    };
+    let exported_symbols = cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO");
+    let mut symbols_below_threshold = {
+        let _timer = cgcx.prof.generic_activity("GCC_lto_generate_symbols_below_threshold");
+        exported_symbols[&LOCAL_CRATE].iter().filter_map(symbol_filter).collect::<Vec<CString>>()
+    };
+    info!("{} symbols to preserve in this crate", symbols_below_threshold.len());
+
+    // If we're performing LTO for the entire crate graph, then for each of our
+    // upstream dependencies, find the corresponding rlib and load the bitcode
+    // from the archive.
+    //
+    // We save off all the bytecode and GCC module file path for later processing
+    // with either fat or thin LTO
+    let mut upstream_modules = Vec::new();
+    if cgcx.lto != Lto::ThinLocal {
+        // Make sure we actually can run LTO
+        for crate_type in cgcx.crate_types.iter() {
+            if !crate_type_allows_lto(*crate_type) {
+                diag_handler.emit_err(LtoDisallowed);
+                return Err(FatalError);
+            } else if *crate_type == CrateType::Dylib {
+                if !cgcx.opts.unstable_opts.dylib_lto {
+                    diag_handler.emit_err(LtoDylib);
+                    return Err(FatalError);
+                }
+            }
+        }
+
+        if cgcx.opts.cg.prefer_dynamic && !cgcx.opts.unstable_opts.dylib_lto {
+            diag_handler.emit_err(DynamicLinkingWithLTO);
+            return Err(FatalError);
+        }
+
+        for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() {
+            let exported_symbols =
+                cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO");
+            {
+                let _timer =
+                    cgcx.prof.generic_activity("GCC_lto_generate_symbols_below_threshold");
+                symbols_below_threshold
+                    .extend(exported_symbols[&cnum].iter().filter_map(symbol_filter));
+            }
+
+            let archive_data = unsafe {
+                Mmap::map(File::open(&path).expect("couldn't open rlib"))
+                    .expect("couldn't map rlib")
+            };
+            let archive = ArchiveFile::parse(&*archive_data).expect("wanted an rlib");
+            let obj_files = archive
+                .members()
+                .filter_map(|child| {
+                    child.ok().and_then(|c| {
+                        std::str::from_utf8(c.name()).ok().map(|name| (name.trim(), c))
+                    })
+                })
+                .filter(|&(name, _)| looks_like_rust_object_file(name));
+            for (name, child) in obj_files {
+                info!("adding bitcode from {}", name);
+                let path = tmp_path.path().join(name);
+                match save_as_file(child.data(&*archive_data).expect("corrupt rlib"), &path) {
+                    Ok(()) => {
+                        let buffer = ModuleBuffer::new(path);
+                        let module = SerializedModule::Local(buffer);
+                        upstream_modules.push((module, CString::new(name).unwrap()));
+                    }
+                    Err(e) => {
+                        diag_handler.emit_err(e);
+                        return Err(FatalError);
+                    }
+                }
+            }
+        }
+    }
+
+    Ok(LtoData {
+        //symbols_below_threshold,
+        upstream_modules,
+        tmp_path,
+    })
+}
+
+fn save_as_file(obj: &[u8], path: &Path) -> Result<(), LtoBitcodeFromRlib> {
+    fs::write(path, obj)
+        .map_err(|error| LtoBitcodeFromRlib {
+            gcc_err: format!("write object file to temp dir: {}", error)
+        })
+}
+
+/// Performs fat LTO by merging all modules into a single one and returning it
+/// for further optimization.
+pub(crate) fn run_fat(
+    cgcx: &CodegenContext<GccCodegenBackend>,
+    modules: Vec<FatLtoInput<GccCodegenBackend>>,
+    cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
+) -> Result<LtoModuleCodegen<GccCodegenBackend>, FatalError> {
+    let diag_handler = cgcx.create_diag_handler();
+    let lto_data = prepare_lto(cgcx, &diag_handler)?;
+    /*let symbols_below_threshold =
+        lto_data.symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();*/
+    fat_lto(cgcx, &diag_handler, modules, cached_modules, lto_data.upstream_modules, lto_data.tmp_path,
+        //&symbols_below_threshold,
+    )
+}
+
+fn fat_lto(cgcx: &CodegenContext<GccCodegenBackend>, _diag_handler: &Handler, modules: Vec<FatLtoInput<GccCodegenBackend>>, cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>, mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>, tmp_path: TempDir,
+    //symbols_below_threshold: &[*const libc::c_char],
+) -> Result<LtoModuleCodegen<GccCodegenBackend>, FatalError> {
+    let _timer = cgcx.prof.generic_activity("GCC_fat_lto_build_monolithic_module");
+    info!("going for a fat lto");
+
+    // Sort out all our lists of incoming modules into two lists.
+    //
+    // * `serialized_modules` (also and argument to this function) contains all
+    //   modules that are serialized in-memory.
+    // * `in_memory` contains modules which are already parsed and in-memory,
+    //   such as from multi-CGU builds.
+    //
+    // All of `cached_modules` (cached from previous incremental builds) can
+    // immediately go onto the `serialized_modules` modules list and then we can
+    // split the `modules` array into these two lists.
+    let mut in_memory = Vec::new();
+    serialized_modules.extend(cached_modules.into_iter().map(|(buffer, wp)| {
+        info!("pushing cached module {:?}", wp.cgu_name);
+        (buffer, CString::new(wp.cgu_name).unwrap())
+    }));
+    for module in modules {
+        match module {
+            FatLtoInput::InMemory(m) => in_memory.push(m),
+            FatLtoInput::Serialized { name, buffer } => {
+                info!("pushing serialized module {:?}", name);
+                let buffer = SerializedModule::Local(buffer);
+                serialized_modules.push((buffer, CString::new(name).unwrap()));
+            }
+        }
+    }
+
+    // Find the "costliest" module and merge everything into that codegen unit.
+    // All the other modules will be serialized and reparsed into the new
+    // context, so this hopefully avoids serializing and parsing the largest
+    // codegen unit.
+    //
+    // Additionally use a regular module as the base here to ensure that various
+    // file copy operations in the backend work correctly. The only other kind
+    // of module here should be an allocator one, and if your crate is smaller
+    // than the allocator module then the size doesn't really matter anyway.
+    let costliest_module = in_memory
+        .iter()
+        .enumerate()
+        .filter(|&(_, module)| module.kind == ModuleKind::Regular)
+        .map(|(i, _module)| {
+            //let cost = unsafe { llvm::LLVMRustModuleCost(module.module_llvm.llmod()) };
+            // TODO(antoyo): compute the cost of a module if GCC allows this.
+            (0, i)
+        })
+        .max();
+
+    // If we found a costliest module, we're good to go. Otherwise all our
+    // inputs were serialized which could happen in the case, for example, that
+    // all our inputs were incrementally reread from the cache and we're just
+    // re-executing the LTO passes. If that's the case deserialize the first
+    // module and create a linker with it.
+    let mut module: ModuleCodegen<GccContext> = match costliest_module {
+        Some((_cost, i)) => in_memory.remove(i),
+        None => {
+            unimplemented!("Incremental");
+            /*assert!(!serialized_modules.is_empty(), "must have at least one serialized module");
+            let (buffer, name) = serialized_modules.remove(0);
+            info!("no in-memory regular modules to choose from, parsing {:?}", name);
+            ModuleCodegen {
+                module_llvm: GccContext::parse(cgcx, &name, buffer.data(), diag_handler)?,
+                name: name.into_string().unwrap(),
+                kind: ModuleKind::Regular,
+            }*/
+        }
+    };
+    let mut serialized_bitcode = Vec::new();
+    {
+        info!("using {:?} as a base module", module.name);
+
+        // We cannot load and merge GCC contexts in memory like cg_llvm is doing.
+        // Instead, we combine the object files into a single object file.
+        for module in in_memory {
+            let path = tmp_path.path().to_path_buf().join(&module.name);
+            let path = path.to_str().expect("path");
+            let context = &module.module_llvm.context;
+            let config = cgcx.config(module.kind);
+            // NOTE: we need to set the optimization level here in order for LTO to do its job.
+            context.set_optimization_level(to_gcc_opt_level(config.opt_level));
+            context.add_command_line_option("-flto=auto");
+            context.add_command_line_option("-flto-partition=one");
+            context.compile_to_file(OutputKind::ObjectFile, path);
+            let buffer = ModuleBuffer::new(PathBuf::from(path));
+            let llmod_id = CString::new(&module.name[..]).unwrap();
+            serialized_modules.push((SerializedModule::Local(buffer), llmod_id));
+        }
+        // Sort the modules to ensure we produce deterministic results.
+        serialized_modules.sort_by(|module1, module2| module1.1.cmp(&module2.1));
+
+        // We add the object files and save in should_combine_object_files that we should combine
+        // them into a single object file when compiling later.
+        for (bc_decoded, name) in serialized_modules {
+            let _timer = cgcx
+                .prof
+                .generic_activity_with_arg_recorder("GCC_fat_lto_link_module", |recorder| {
+                    recorder.record_arg(format!("{:?}", name))
+                });
+            info!("linking {:?}", name);
+            match bc_decoded {
+                SerializedModule::Local(ref module_buffer) => {
+                    module.module_llvm.should_combine_object_files = true;
+                    module.module_llvm.context.add_driver_option(module_buffer.0.to_str().expect("path"));
+                },
+                SerializedModule::FromRlib(_) => unimplemented!("from rlib"),
+                SerializedModule::FromUncompressedFile(_) => unimplemented!("from uncompressed file"),
+            }
+            serialized_bitcode.push(bc_decoded);
+        }
+        save_temp_bitcode(cgcx, &module, "lto.input");
+
+        // Internalize everything below threshold to help strip out more modules and such.
+        /*unsafe {
+            let ptr = symbols_below_threshold.as_ptr();
+            llvm::LLVMRustRunRestrictionPass(
+                llmod,
+                ptr as *const *const libc::c_char,
+                symbols_below_threshold.len() as libc::size_t,
+            );*/
+            save_temp_bitcode(cgcx, &module, "lto.after-restriction");
+        //}
+    }
+
+    // NOTE: save the temporary directory used by LTO so that it gets deleted after linking instead
+    // of now.
+    module.module_llvm.temp_dir = Some(tmp_path);
+
+    Ok(LtoModuleCodegen::Fat { module, _serialized_bitcode: serialized_bitcode })
+}
+
+pub struct ModuleBuffer(PathBuf);
+
+impl ModuleBuffer {
+    pub fn new(path: PathBuf) -> ModuleBuffer {
+        ModuleBuffer(path)
+    }
+}
+
+impl ModuleBufferMethods for ModuleBuffer {
+    fn data(&self) -> &[u8] {
+        unimplemented!("data not needed for GCC codegen");
+    }
+}
diff --git a/compiler/rustc_codegen_gcc/src/back/mod.rs b/compiler/rustc_codegen_gcc/src/back/mod.rs
index d692799d764..10187eab0d7 100644
--- a/compiler/rustc_codegen_gcc/src/back/mod.rs
+++ b/compiler/rustc_codegen_gcc/src/back/mod.rs
@@ -1 +1,2 @@
+pub mod lto;
 pub mod write;
diff --git a/compiler/rustc_codegen_gcc/src/back/write.rs b/compiler/rustc_codegen_gcc/src/back/write.rs
index 5f54ac4ebc6..04772d7707a 100644
--- a/compiler/rustc_codegen_gcc/src/back/write.rs
+++ b/compiler/rustc_codegen_gcc/src/back/write.rs
@@ -2,27 +2,71 @@ use std::{env, fs};
 
 use gccjit::OutputKind;
 use rustc_codegen_ssa::{CompiledModule, ModuleCodegen};
-use rustc_codegen_ssa::back::write::{CodegenContext, EmitObj, ModuleConfig};
+use rustc_codegen_ssa::back::link::ensure_removed;
+use rustc_codegen_ssa::back::write::{BitcodeSection, CodegenContext, EmitObj, ModuleConfig};
 use rustc_errors::Handler;
+use rustc_fs_util::link_or_copy;
 use rustc_session::config::OutputType;
 use rustc_span::fatal_error::FatalError;
 use rustc_target::spec::SplitDebuginfo;
 
 use crate::{GccCodegenBackend, GccContext};
+use crate::errors::CopyBitcode;
 
-pub(crate) unsafe fn codegen(cgcx: &CodegenContext<GccCodegenBackend>, _diag_handler: &Handler, module: ModuleCodegen<GccContext>, config: &ModuleConfig) -> Result<CompiledModule, FatalError> {
-    let _timer = cgcx.prof.generic_activity_with_arg("LLVM_module_codegen", &*module.name);
+pub(crate) unsafe fn codegen(cgcx: &CodegenContext<GccCodegenBackend>, diag_handler: &Handler, module: ModuleCodegen<GccContext>, config: &ModuleConfig) -> Result<CompiledModule, FatalError> {
+    let _timer = cgcx.prof.generic_activity_with_arg("GCC_module_codegen", &*module.name);
     {
         let context = &module.module_llvm.context;
 
         let module_name = module.name.clone();
+
+        let should_combine_object_files = module.module_llvm.should_combine_object_files;
+
         let module_name = Some(&module_name[..]);
 
-        let _bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
+        // NOTE: Only generate object files with GIMPLE when this environment variable is set for
+        // now because this requires a particular setup (same gcc/lto1/lto-wrapper commit as libgccjit).
+        let fat_lto = env::var("EMBED_LTO_BITCODE").as_deref() == Ok("1");
+
+        let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
         let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name);
 
-        if config.bitcode_needed() {
+        if config.bitcode_needed() && fat_lto {
+            let _timer = cgcx
+                .prof
+                .generic_activity_with_arg("GCC_module_codegen_make_bitcode", &*module.name);
+
             // TODO(antoyo)
+            /*if let Some(bitcode_filename) = bc_out.file_name() {
+                cgcx.prof.artifact_size(
+                    "llvm_bitcode",
+                    bitcode_filename.to_string_lossy(),
+                    data.len() as u64,
+                );
+            }*/
+
+            if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
+                let _timer = cgcx
+                    .prof
+                    .generic_activity_with_arg("GCC_module_codegen_emit_bitcode", &*module.name);
+                context.add_command_line_option("-flto=auto");
+                context.add_command_line_option("-flto-partition=one");
+                context.compile_to_file(OutputKind::ObjectFile, bc_out.to_str().expect("path to str"));
+            }
+
+            if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full) {
+                let _timer = cgcx
+                    .prof
+                    .generic_activity_with_arg("GCC_module_codegen_embed_bitcode", &*module.name);
+                // TODO(antoyo): maybe we should call embed_bitcode to have the proper iOS fixes?
+                //embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, data);
+
+                context.add_command_line_option("-flto=auto");
+                context.add_command_line_option("-flto-partition=one");
+                context.add_command_line_option("-ffat-lto-objects");
+                // TODO(antoyo): Send -plugin/usr/lib/gcc/x86_64-pc-linux-gnu/11.1.0/liblto_plugin.so to linker (this should be done when specifying the appropriate rustc cli argument).
+                context.compile_to_file(OutputKind::ObjectFile, bc_out.to_str().expect("path to str"));
+            }
         }
 
         if config.emit_ir {
@@ -32,7 +76,7 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<GccCodegenBackend>, _diag_han
         if config.emit_asm {
             let _timer = cgcx
                 .prof
-                .generic_activity_with_arg("LLVM_module_codegen_emit_asm", &*module.name);
+                .generic_activity_with_arg("GCC_module_codegen_emit_asm", &*module.name);
             let path = cgcx.output_filenames.temp_path(OutputType::Assembly, module_name);
             context.compile_to_file(OutputKind::Assembler, path.to_str().expect("path to str"));
         }
@@ -41,7 +85,7 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<GccCodegenBackend>, _diag_han
             EmitObj::ObjectCode(_) => {
                 let _timer = cgcx
                     .prof
-                    .generic_activity_with_arg("LLVM_module_codegen_emit_obj", &*module.name);
+                    .generic_activity_with_arg("GCC_module_codegen_emit_obj", &*module.name);
                 if env::var("CG_GCCJIT_DUMP_MODULE_NAMES").as_deref() == Ok("1") {
                     println!("Module {}", module.name);
                 }
@@ -60,11 +104,36 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<GccCodegenBackend>, _diag_han
                     context.set_debug_info(true);
                     context.dump_to_file(path, true);
                 }
-                context.compile_to_file(OutputKind::ObjectFile, obj_out.to_str().expect("path to str"));
+                if should_combine_object_files && fat_lto {
+                    context.add_command_line_option("-flto=auto");
+                    context.add_command_line_option("-flto-partition=one");
+
+                    context.add_driver_option("-Wl,-r");
+                    // NOTE: we need -nostdlib, otherwise, we get the following error:
+                    // /usr/bin/ld: cannot find -lgcc_s: No such file or directory
+                    context.add_driver_option("-nostdlib");
+                    // NOTE: without -fuse-linker-plugin, we get the following error:
+                    // lto1: internal compiler error: decompressed stream: Destination buffer is too small
+                    context.add_driver_option("-fuse-linker-plugin");
+
+                    // NOTE: this doesn't actually generate an executable. With the above flags, it combines the .o files together in another .o.
+                    context.compile_to_file(OutputKind::Executable, obj_out.to_str().expect("path to str"));
+                }
+                else {
+                    context.compile_to_file(OutputKind::ObjectFile, obj_out.to_str().expect("path to str"));
+                }
             }
 
             EmitObj::Bitcode => {
-                // TODO(antoyo)
+                debug!("copying bitcode {:?} to obj {:?}", bc_out, obj_out);
+                if let Err(err) = link_or_copy(&bc_out, &obj_out) {
+                    diag_handler.emit_err(CopyBitcode { err });
+                }
+
+                if !config.emit_bc {
+                    debug!("removing_bitcode {:?}", bc_out);
+                    ensure_removed(diag_handler, &bc_out);
+                }
             }
 
             EmitObj::None => {}
@@ -82,3 +151,18 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<GccCodegenBackend>, _diag_han
 pub(crate) fn link(_cgcx: &CodegenContext<GccCodegenBackend>, _diag_handler: &Handler, mut _modules: Vec<ModuleCodegen<GccContext>>) -> Result<ModuleCodegen<GccContext>, FatalError> {
     unimplemented!();
 }
+
+pub(crate) fn save_temp_bitcode(cgcx: &CodegenContext<GccCodegenBackend>, _module: &ModuleCodegen<GccContext>, _name: &str) {
+    if !cgcx.save_temps {
+        return;
+    }
+    unimplemented!();
+    /*unsafe {
+        let ext = format!("{}.bc", name);
+        let cgu = Some(&module.name[..]);
+        let path = cgcx.output_filenames.temp_path_ext(&ext, cgu);
+        let cstr = path_to_c_string(&path);
+        let llmod = module.module_llvm.llmod();
+        llvm::LLVMWriteBitcodeToFile(llmod, cstr.as_ptr());
+    }*/
+}
diff --git a/compiler/rustc_codegen_gcc/src/base.rs b/compiler/rustc_codegen_gcc/src/base.rs
index 9e614ca4ace..61da38f4b0d 100644
--- a/compiler/rustc_codegen_gcc/src/base.rs
+++ b/compiler/rustc_codegen_gcc/src/base.rs
@@ -1,3 +1,4 @@
+use std::collections::HashSet;
 use std::env;
 use std::time::Instant;
 
@@ -18,6 +19,7 @@ use rustc_codegen_ssa::traits::DebugInfoMethods;
 use rustc_session::config::DebugInfo;
 use rustc_span::Symbol;
 
+use crate::{LockedTargetInfo, gcc_util};
 use crate::GccContext;
 use crate::builder::Builder;
 use crate::context::CodegenCx;
@@ -50,6 +52,7 @@ pub fn global_linkage_to_gcc(linkage: Linkage) -> GlobalKind {
 pub fn linkage_to_gcc(linkage: Linkage) -> FunctionType {
     match linkage {
         Linkage::External => FunctionType::Exported,
+        // TODO(antoyo): set the attribute externally_visible.
         Linkage::AvailableExternally => FunctionType::Extern,
         Linkage::LinkOnceAny => unimplemented!(),
         Linkage::LinkOnceODR => unimplemented!(),
@@ -63,7 +66,7 @@ pub fn linkage_to_gcc(linkage: Linkage) -> FunctionType {
     }
 }
 
-pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_integers: bool) -> (ModuleCodegen<GccContext>, u64) {
+pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, target_info: LockedTargetInfo) -> (ModuleCodegen<GccContext>, u64) {
     let prof_timer = tcx.prof.generic_activity("codegen_module");
     let start_time = Instant::now();
 
@@ -71,7 +74,7 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
     let (module, _) = tcx.dep_graph.with_task(
         dep_node,
         tcx,
-        (cgu_name, supports_128bit_integers),
+        (cgu_name, target_info),
         module_codegen,
         Some(dep_graph::hash_result),
     );
@@ -82,38 +85,28 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
     // the time we needed for codegenning it.
     let cost = time_to_codegen.as_secs() * 1_000_000_000 + time_to_codegen.subsec_nanos() as u64;
 
-    fn module_codegen(tcx: TyCtxt<'_>, (cgu_name, supports_128bit_integers): (Symbol, bool)) -> ModuleCodegen<GccContext> {
+    fn module_codegen(tcx: TyCtxt<'_>, (cgu_name, target_info): (Symbol, LockedTargetInfo)) -> ModuleCodegen<GccContext> {
         let cgu = tcx.codegen_unit(cgu_name);
         // Instantiate monomorphizations without filling out definitions yet...
-        //let llvm_module = ModuleLlvm::new(tcx, &cgu_name.as_str());
         let context = Context::default();
 
         context.add_command_line_option("-fexceptions");
         context.add_driver_option("-fexceptions");
 
+        let disabled_features: HashSet<_> = tcx.sess.opts.cg.target_feature.split(',')
+            .filter(|feature| feature.starts_with('-'))
+            .map(|string| &string[1..])
+            .collect();
+
         // TODO(antoyo): only set on x86 platforms.
         context.add_command_line_option("-masm=intel");
-        // TODO(antoyo): only add the following cli argument if the feature is supported.
-        context.add_command_line_option("-msse2");
-        context.add_command_line_option("-mavx2");
-        // FIXME(antoyo): the following causes an illegal instruction on vmovdqu64 in std_example on my CPU.
-        // Only add if the CPU supports it.
-        context.add_command_line_option("-msha");
-        context.add_command_line_option("-mpclmul");
-        context.add_command_line_option("-mfma");
-        context.add_command_line_option("-mfma4");
-        context.add_command_line_option("-m64");
-        context.add_command_line_option("-mbmi");
-        context.add_command_line_option("-mgfni");
-        //context.add_command_line_option("-mavxvnni"); // The CI doesn't support this option.
-        context.add_command_line_option("-mf16c");
-        context.add_command_line_option("-maes");
-        context.add_command_line_option("-mxsavec");
-        context.add_command_line_option("-mbmi2");
-        context.add_command_line_option("-mrtm");
-        context.add_command_line_option("-mvaes");
-        context.add_command_line_option("-mvpclmulqdq");
-        context.add_command_line_option("-mavx");
+
+        if !disabled_features.contains("avx") {
+            // NOTE: we always enable AVX because the equivalent of llvm.x86.sse2.cmp.pd in GCC for
+            // SSE2 is multiple builtins, so we use the AVX __builtin_ia32_cmppd instead.
+            // FIXME(antoyo): use the proper builtins for llvm.x86.sse2.cmp.pd and similar.
+            context.add_command_line_option("-mavx");
+        }
 
         for arg in &tcx.sess.opts.cg.llvm_args {
             context.add_command_line_option(arg);
@@ -127,6 +120,16 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
         // NOTE: Rust relies on LLVM doing wrapping on overflow.
         context.add_command_line_option("-fwrapv");
 
+        if tcx.sess.opts.cg.relocation_model == Some(rustc_target::spec::RelocModel::Static) {
+            context.add_command_line_option("-mcmodel=kernel");
+            context.add_command_line_option("-fno-pie");
+        }
+
+        let target_cpu = gcc_util::target_cpu(tcx.sess);
+        if target_cpu != "generic" {
+            context.add_command_line_option(&format!("-march={}", target_cpu));
+        }
+
         if tcx.sess.opts.unstable_opts.function_sections.unwrap_or(tcx.sess.target.function_sections) {
             context.add_command_line_option("-ffunction-sections");
             context.add_command_line_option("-fdata-sections");
@@ -135,8 +138,14 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
         if env::var("CG_GCCJIT_DUMP_RTL").as_deref() == Ok("1") {
             context.add_command_line_option("-fdump-rtl-vregs");
         }
+        if env::var("CG_GCCJIT_DUMP_RTL_ALL").as_deref() == Ok("1") {
+            context.add_command_line_option("-fdump-rtl-all");
+        }
         if env::var("CG_GCCJIT_DUMP_TREE_ALL").as_deref() == Ok("1") {
-            context.add_command_line_option("-fdump-tree-all");
+            context.add_command_line_option("-fdump-tree-all-eh");
+        }
+        if env::var("CG_GCCJIT_DUMP_IPA_ALL").as_deref() == Ok("1") {
+            context.add_command_line_option("-fdump-ipa-all-eh");
         }
         if env::var("CG_GCCJIT_DUMP_CODE").as_deref() == Ok("1") {
             context.set_dump_code_on_compile(true);
@@ -152,11 +161,15 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
             context.set_keep_intermediates(true);
         }
 
+        if env::var("CG_GCCJIT_VERBOSE").as_deref() == Ok("1") {
+            context.add_driver_option("-v");
+        }
+
         // NOTE: The codegen generates unrechable blocks.
         context.set_allow_unreachable_blocks(true);
 
         {
-            let cx = CodegenCx::new(&context, cgu, tcx, supports_128bit_integers);
+            let cx = CodegenCx::new(&context, cgu, tcx, target_info.supports_128bit_int());
 
             let mono_items = cgu.items_in_deterministic_order(tcx);
             for &(mono_item, data) in &mono_items {
@@ -181,7 +194,9 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i
         ModuleCodegen {
             name: cgu_name.to_string(),
             module_llvm: GccContext {
-                context
+                context,
+                should_combine_object_files: false,
+                temp_dir: None,
             },
             kind: ModuleKind::Regular,
         }
diff --git a/compiler/rustc_codegen_gcc/src/builder.rs b/compiler/rustc_codegen_gcc/src/builder.rs
index ecc293aee23..b7841808934 100644
--- a/compiler/rustc_codegen_gcc/src/builder.rs
+++ b/compiler/rustc_codegen_gcc/src/builder.rs
@@ -247,16 +247,9 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
     }
 
     fn check_store(&mut self, val: RValue<'gcc>, ptr: RValue<'gcc>) -> RValue<'gcc> {
-        let dest_ptr_ty = self.cx.val_ty(ptr).make_pointer(); // TODO(antoyo): make sure make_pointer() is okay here.
         let stored_ty = self.cx.val_ty(val);
         let stored_ptr_ty = self.cx.type_ptr_to(stored_ty);
-
-        if dest_ptr_ty == stored_ptr_ty {
-            ptr
-        }
-        else {
-            self.bitcast(ptr, stored_ptr_ty)
-        }
+        self.bitcast(ptr, stored_ptr_ty)
     }
 
     pub fn current_func(&self) -> Function<'gcc> {
@@ -500,7 +493,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     #[cfg(not(feature="master"))]
-    fn invoke(&mut self, typ: Type<'gcc>, fn_attrs: &CodegenFnAttrs, fn_abi: Option<&FnAbi<'tcx, Ty<'tcx>>>, func: RValue<'gcc>, args: &[RValue<'gcc>], then: Block<'gcc>, catch: Block<'gcc>, _funclet: Option<&Funclet>) -> RValue<'gcc> {
+    fn invoke(&mut self, typ: Type<'gcc>, fn_attrs: Option<&CodegenFnAttrs>, fn_abi: Option<&FnAbi<'tcx, Ty<'tcx>>>, func: RValue<'gcc>, args: &[RValue<'gcc>], then: Block<'gcc>, catch: Block<'gcc>, _funclet: Option<&Funclet>) -> RValue<'gcc> {
         let call_site = self.call(typ, fn_attrs, None, func, args, None);
         let condition = self.context.new_rvalue_from_int(self.bool_type, 1);
         self.llbb().end_with_conditional(None, condition, then, catch);
@@ -663,7 +656,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn unchecked_sadd(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
-        a + b
+        self.gcc_add(a, b)
     }
 
     fn unchecked_uadd(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
@@ -671,7 +664,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn unchecked_ssub(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
-        a - b
+        self.gcc_sub(a, b)
     }
 
     fn unchecked_usub(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
@@ -680,11 +673,11 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn unchecked_smul(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
-        a * b
+        self.gcc_mul(a, b)
     }
 
     fn unchecked_umul(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
-        a * b
+        self.gcc_mul(a, b)
     }
 
     fn fadd_fast(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
@@ -916,7 +909,9 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
             .add_eval(None, self.context.new_call(None, atomic_store, &[ptr, value, ordering]));
     }
 
-    fn gep(&mut self, _typ: Type<'gcc>, ptr: RValue<'gcc>, indices: &[RValue<'gcc>]) -> RValue<'gcc> {
+    fn gep(&mut self, typ: Type<'gcc>, ptr: RValue<'gcc>, indices: &[RValue<'gcc>]) -> RValue<'gcc> {
+        // NOTE: due to opaque pointers now being used, we need to cast here.
+        let ptr = self.context.new_cast(None, ptr, typ.make_pointer());
         let ptr_type = ptr.get_type();
         let mut pointee_type = ptr.get_type();
         // NOTE: we cannot use array indexing here like in inbounds_gep because array indexing is
@@ -927,6 +922,12 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         // require dereferencing the pointer.
         for index in indices {
             pointee_type = pointee_type.get_pointee().expect("pointee type");
+            #[cfg(feature="master")]
+            let pointee_size = {
+                let size = self.cx.context.new_sizeof(pointee_type);
+                self.context.new_cast(None, size, index.get_type())
+            };
+            #[cfg(not(feature="master"))]
             let pointee_size = self.context.new_rvalue_from_int(index.get_type(), pointee_type.get_size() as i32);
             result = result + self.gcc_int_cast(*index * pointee_size, self.sizet_type);
         }
diff --git a/compiler/rustc_codegen_gcc/src/declare.rs b/compiler/rustc_codegen_gcc/src/declare.rs
index 493626c3cf5..e673d0af4c7 100644
--- a/compiler/rustc_codegen_gcc/src/declare.rs
+++ b/compiler/rustc_codegen_gcc/src/declare.rs
@@ -1,4 +1,6 @@
 use gccjit::{Function, FunctionType, GlobalKind, LValue, RValue, Type};
+#[cfg(feature="master")]
+use gccjit::{FnAttribute, ToRValue};
 use rustc_codegen_ssa::traits::BaseTypeMethods;
 use rustc_middle::ty::Ty;
 use rustc_span::Symbol;
@@ -114,6 +116,44 @@ fn declare_raw_fn<'gcc>(cx: &CodegenCx<'gcc, '_>, name: &str, _callconv: () /*ll
                 .collect();
             let func = cx.context.new_function(None, cx.linkage.get(), return_type, &params, mangle_name(name), variadic);
             cx.functions.borrow_mut().insert(name.to_string(), func);
+
+            #[cfg(feature="master")]
+            if name == "rust_eh_personality" {
+                // NOTE: GCC will sometimes change the personality function set on a function from
+                // rust_eh_personality to __gcc_personality_v0 as an optimization.
+                // As such, we need to create a weak alias from __gcc_personality_v0 to
+                // rust_eh_personality in order to avoid a linker error.
+                // This needs to be weak in order to still allow using the standard
+                // __gcc_personality_v0 when the linking to it.
+                // Since aliases don't work (maybe because of a bug in LTO partitioning?), we
+                // create a wrapper function that calls rust_eh_personality.
+
+                let params: Vec<_> = param_types.into_iter().enumerate()
+                    .map(|(index, param)| cx.context.new_parameter(None, *param, &format!("param{}", index))) // TODO(antoyo): set name.
+                    .collect();
+                let gcc_func = cx.context.new_function(None, FunctionType::Exported, return_type, &params, "__gcc_personality_v0", variadic);
+
+                // We need a normal extern function for the crates that access rust_eh_personality
+                // without defining it, otherwise we'll get a compiler error.
+                //
+                // For the crate defining it, that needs to be a weak alias instead.
+                gcc_func.add_attribute(FnAttribute::Weak);
+
+                let block = gcc_func.new_block("start");
+                let mut args = vec![];
+                for param in &params {
+                    args.push(param.to_rvalue());
+                }
+                let call = cx.context.new_call(None, func, &args);
+                if return_type == cx.type_void() {
+                    block.add_eval(None, call);
+                    block.end_with_void_return(None);
+                }
+                else {
+                    block.end_with_return(None, call);
+                }
+            }
+
             func
         };
 
diff --git a/compiler/rustc_codegen_gcc/src/errors.rs b/compiler/rustc_codegen_gcc/src/errors.rs
index 693367192b1..4bf3b71f503 100644
--- a/compiler/rustc_codegen_gcc/src/errors.rs
+++ b/compiler/rustc_codegen_gcc/src/errors.rs
@@ -1,8 +1,36 @@
-use rustc_errors::{DiagnosticArgValue, IntoDiagnosticArg};
-use rustc_macros::Diagnostic;
+use rustc_errors::{
+    DiagnosticArgValue, DiagnosticBuilder, ErrorGuaranteed, Handler, IntoDiagnostic, IntoDiagnosticArg,
+};
+use rustc_macros::{Diagnostic, Subdiagnostic};
 use rustc_span::Span;
 use std::borrow::Cow;
 
+use crate::fluent_generated as fluent;
+
+#[derive(Diagnostic)]
+#[diag(codegen_gcc_unknown_ctarget_feature_prefix)]
+#[note]
+pub(crate) struct UnknownCTargetFeaturePrefix<'a> {
+    pub feature: &'a str,
+}
+
+#[derive(Diagnostic)]
+#[diag(codegen_gcc_unknown_ctarget_feature)]
+#[note]
+pub(crate) struct UnknownCTargetFeature<'a> {
+    pub feature: &'a str,
+    #[subdiagnostic]
+    pub rust_feature: PossibleFeature<'a>,
+}
+
+#[derive(Subdiagnostic)]
+pub(crate) enum PossibleFeature<'a> {
+    #[help(codegen_gcc_possible_feature)]
+    Some { rust_feature: &'a str },
+    #[help(codegen_gcc_consider_filing_feature_request)]
+    None,
+}
+
 struct ExitCode(Option<i32>);
 
 impl IntoDiagnosticArg for ExitCode {
@@ -40,3 +68,58 @@ pub(crate) struct TiedTargetFeatures {
     pub span: Span,
     pub features: String,
 }
+
+#[derive(Diagnostic)]
+#[diag(codegen_gcc_copy_bitcode)]
+pub(crate) struct CopyBitcode {
+    pub err: std::io::Error,
+}
+
+#[derive(Diagnostic)]
+#[diag(codegen_gcc_dynamic_linking_with_lto)]
+#[note]
+pub(crate) struct DynamicLinkingWithLTO;
+
+#[derive(Diagnostic)]
+#[diag(codegen_gcc_load_bitcode)]
+pub(crate) struct LoadBitcode {
+    name: String,
+}
+
+#[derive(Diagnostic)]
+#[diag(codegen_gcc_lto_disallowed)]
+pub(crate) struct LtoDisallowed;
+
+#[derive(Diagnostic)]
+#[diag(codegen_gcc_lto_dylib)]
+pub(crate) struct LtoDylib;
+
+#[derive(Diagnostic)]
+#[diag(codegen_gcc_lto_bitcode_from_rlib)]
+pub(crate) struct LtoBitcodeFromRlib {
+    pub gcc_err: String,
+}
+
+pub(crate) struct TargetFeatureDisableOrEnable<'a> {
+    pub features: &'a [&'a str],
+    pub span: Option<Span>,
+    pub missing_features: Option<MissingFeatures>,
+}
+
+#[derive(Subdiagnostic)]
+#[help(codegen_gcc_missing_features)]
+pub(crate) struct MissingFeatures;
+
+impl IntoDiagnostic<'_, ErrorGuaranteed> for TargetFeatureDisableOrEnable<'_> {
+    fn into_diagnostic(self, sess: &'_ Handler) -> DiagnosticBuilder<'_, ErrorGuaranteed> {
+        let mut diag = sess.struct_err(fluent::codegen_gcc_target_feature_disable_or_enable);
+        if let Some(span) = self.span {
+            diag.set_span(span);
+        };
+        if let Some(missing_features) = self.missing_features {
+            diag.subdiagnostic(missing_features);
+        }
+        diag.set_arg("features", self.features.join(", "));
+        diag
+    }
+}
diff --git a/compiler/rustc_codegen_gcc/src/gcc_util.rs b/compiler/rustc_codegen_gcc/src/gcc_util.rs
new file mode 100644
index 00000000000..0514c9988e0
--- /dev/null
+++ b/compiler/rustc_codegen_gcc/src/gcc_util.rs
@@ -0,0 +1,223 @@
+#[cfg(feature="master")]
+use gccjit::Context;
+use smallvec::{smallvec, SmallVec};
+
+use rustc_codegen_ssa::target_features::{
+    supported_target_features, tied_target_features, RUSTC_SPECIFIC_FEATURES,
+};
+use rustc_data_structures::fx::FxHashMap;
+use rustc_middle::bug;
+use rustc_session::Session;
+
+use crate::errors::{PossibleFeature, TargetFeatureDisableOrEnable, UnknownCTargetFeature, UnknownCTargetFeaturePrefix};
+
+/// The list of GCC features computed from CLI flags (`-Ctarget-cpu`, `-Ctarget-feature`,
+/// `--target` and similar).
+pub(crate) fn global_gcc_features(sess: &Session, diagnostics: bool) -> Vec<String> {
+    // Features that come earlier are overridden by conflicting features later in the string.
+    // Typically we'll want more explicit settings to override the implicit ones, so:
+    //
+    // * Features from -Ctarget-cpu=*; are overridden by [^1]
+    // * Features implied by --target; are overridden by
+    // * Features from -Ctarget-feature; are overridden by
+    // * function specific features.
+    //
+    // [^1]: target-cpu=native is handled here, other target-cpu values are handled implicitly
+    // through GCC march implementation.
+    //
+    // FIXME(nagisa): it isn't clear what's the best interaction between features implied by
+    // `-Ctarget-cpu` and `--target` are. On one hand, you'd expect CLI arguments to always
+    // override anything that's implicit, so e.g. when there's no `--target` flag, features implied
+    // the host target are overridden by `-Ctarget-cpu=*`. On the other hand, what about when both
+    // `--target` and `-Ctarget-cpu=*` are specified? Both then imply some target features and both
+    // flags are specified by the user on the CLI. It isn't as clear-cut which order of precedence
+    // should be taken in cases like these.
+    let mut features = vec![];
+
+    // Features implied by an implicit or explicit `--target`.
+    features.extend(
+        sess.target
+            .features
+            .split(',')
+            .filter(|v| !v.is_empty() && backend_feature_name(v).is_some())
+            .map(String::from),
+    );
+
+    // -Ctarget-features
+    let supported_features = supported_target_features(sess);
+    let mut featsmap = FxHashMap::default();
+    let feats = sess.opts.cg.target_feature
+        .split(',')
+        .filter_map(|s| {
+            let enable_disable = match s.chars().next() {
+                None => return None,
+                Some(c @ ('+' | '-')) => c,
+                Some(_) => {
+                    if diagnostics {
+                        sess.emit_warning(UnknownCTargetFeaturePrefix { feature: s });
+                    }
+                    return None;
+                }
+            };
+
+            let feature = backend_feature_name(s)?;
+            // Warn against use of GCC specific feature names on the CLI.
+            if diagnostics && !supported_features.iter().any(|&(v, _)| v == feature) {
+                let rust_feature = supported_features.iter().find_map(|&(rust_feature, _)| {
+                    let gcc_features = to_gcc_features(sess, rust_feature);
+                    if gcc_features.contains(&feature) && !gcc_features.contains(&rust_feature) {
+                        Some(rust_feature)
+                    } else {
+                        None
+                    }
+                });
+                let unknown_feature =
+                    if let Some(rust_feature) = rust_feature {
+                        UnknownCTargetFeature {
+                            feature,
+                            rust_feature: PossibleFeature::Some { rust_feature },
+                        }
+                    }
+                    else {
+                        UnknownCTargetFeature { feature, rust_feature: PossibleFeature::None }
+                    };
+                sess.emit_warning(unknown_feature);
+            }
+
+            if diagnostics {
+                // FIXME(nagisa): figure out how to not allocate a full hashset here.
+                featsmap.insert(feature, enable_disable == '+');
+            }
+
+            // rustc-specific features do not get passed down to GCC…
+            if RUSTC_SPECIFIC_FEATURES.contains(&feature) {
+                return None;
+            }
+            // ... otherwise though we run through `to_gcc_features` when
+            // passing requests down to GCC. This means that all in-language
+            // features also work on the command line instead of having two
+            // different names when the GCC name and the Rust name differ.
+            Some(to_gcc_features(sess, feature)
+                .iter()
+                .flat_map(|feat| to_gcc_features(sess, feat).into_iter())
+                .map(|feature| {
+                    if enable_disable == '-' {
+                        format!("-{}", feature)
+                    }
+                    else {
+                        feature.to_string()
+                    }
+                })
+                .collect::<Vec<_>>(),
+            )
+        })
+        .flatten();
+    features.extend(feats);
+
+    if diagnostics {
+        if let Some(f) = check_tied_features(sess, &featsmap) {
+            sess.emit_err(TargetFeatureDisableOrEnable {
+                features: f,
+                span: None,
+                missing_features: None,
+            });
+        }
+    }
+
+    features
+}
+
+/// Returns a feature name for the given `+feature` or `-feature` string.
+///
+/// Only allows features that are backend specific (i.e. not [`RUSTC_SPECIFIC_FEATURES`].)
+fn backend_feature_name(s: &str) -> Option<&str> {
+    // features must start with a `+` or `-`.
+    let feature = s.strip_prefix(&['+', '-'][..]).unwrap_or_else(|| {
+        bug!("target feature `{}` must begin with a `+` or `-`", s);
+    });
+    // Rustc-specific feature requests like `+crt-static` or `-crt-static`
+    // are not passed down to GCC.
+    if RUSTC_SPECIFIC_FEATURES.contains(&feature) {
+        return None;
+    }
+    Some(feature)
+}
+
+// To find a list of GCC's names, check https://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
+pub fn to_gcc_features<'a>(sess: &Session, s: &'a str) -> SmallVec<[&'a str; 2]> {
+    let arch = if sess.target.arch == "x86_64" { "x86" } else { &*sess.target.arch };
+    match (arch, s) {
+        ("x86", "sse4.2") => smallvec!["sse4.2", "crc32"],
+        ("x86", "pclmulqdq") => smallvec!["pclmul"],
+        ("x86", "rdrand") => smallvec!["rdrnd"],
+        ("x86", "bmi1") => smallvec!["bmi"],
+        ("x86", "cmpxchg16b") => smallvec!["cx16"],
+        ("x86", "avx512vaes") => smallvec!["vaes"],
+        ("x86", "avx512gfni") => smallvec!["gfni"],
+        ("x86", "avx512vpclmulqdq") => smallvec!["vpclmulqdq"],
+        // NOTE: seems like GCC requires 'avx512bw' for 'avx512vbmi2'.
+        ("x86", "avx512vbmi2") => smallvec!["avx512vbmi2", "avx512bw"],
+        // NOTE: seems like GCC requires 'avx512bw' for 'avx512bitalg'.
+        ("x86", "avx512bitalg") => smallvec!["avx512bitalg", "avx512bw"],
+        ("aarch64", "rcpc2") => smallvec!["rcpc-immo"],
+        ("aarch64", "dpb") => smallvec!["ccpp"],
+        ("aarch64", "dpb2") => smallvec!["ccdp"],
+        ("aarch64", "frintts") => smallvec!["fptoint"],
+        ("aarch64", "fcma") => smallvec!["complxnum"],
+        ("aarch64", "pmuv3") => smallvec!["perfmon"],
+        ("aarch64", "paca") => smallvec!["pauth"],
+        ("aarch64", "pacg") => smallvec!["pauth"],
+        // Rust ties fp and neon together. In GCC neon implicitly enables fp,
+        // but we manually enable neon when a feature only implicitly enables fp
+        ("aarch64", "f32mm") => smallvec!["f32mm", "neon"],
+        ("aarch64", "f64mm") => smallvec!["f64mm", "neon"],
+        ("aarch64", "fhm") => smallvec!["fp16fml", "neon"],
+        ("aarch64", "fp16") => smallvec!["fullfp16", "neon"],
+        ("aarch64", "jsconv") => smallvec!["jsconv", "neon"],
+        ("aarch64", "sve") => smallvec!["sve", "neon"],
+        ("aarch64", "sve2") => smallvec!["sve2", "neon"],
+        ("aarch64", "sve2-aes") => smallvec!["sve2-aes", "neon"],
+        ("aarch64", "sve2-sm4") => smallvec!["sve2-sm4", "neon"],
+        ("aarch64", "sve2-sha3") => smallvec!["sve2-sha3", "neon"],
+        ("aarch64", "sve2-bitperm") => smallvec!["sve2-bitperm", "neon"],
+        (_, s) => smallvec![s],
+    }
+}
+
+// Given a map from target_features to whether they are enabled or disabled,
+// ensure only valid combinations are allowed.
+pub fn check_tied_features(sess: &Session, features: &FxHashMap<&str, bool>) -> Option<&'static [&'static str]> {
+    for tied in tied_target_features(sess) {
+        // Tied features must be set to the same value, or not set at all
+        let mut tied_iter = tied.iter();
+        let enabled = features.get(tied_iter.next().unwrap());
+        if tied_iter.any(|feature| enabled != features.get(feature)) {
+            return Some(tied);
+        }
+    }
+    None
+}
+
+fn handle_native(name: &str) -> &str {
+    if name != "native" {
+        return name;
+    }
+
+    #[cfg(feature="master")]
+    {
+        // Get the native arch.
+        let context = Context::default();
+        context.get_target_info().arch().unwrap()
+            .to_str()
+            .unwrap()
+    }
+    #[cfg(not(feature="master"))]
+    unimplemented!();
+}
+
+pub fn target_cpu(sess: &Session) -> &str {
+    match sess.opts.cg.target_cpu {
+        Some(ref name) => handle_native(name),
+        None => handle_native(sess.target.cpu.as_ref()),
+    }
+}
diff --git a/compiler/rustc_codegen_gcc/src/int.rs b/compiler/rustc_codegen_gcc/src/int.rs
index 0cf1204791d..58e0dd56f38 100644
--- a/compiler/rustc_codegen_gcc/src/int.rs
+++ b/compiler/rustc_codegen_gcc/src/int.rs
@@ -36,7 +36,6 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             self.cx.context.new_unary_op(None, operation, typ, a)
         }
         else {
-            // TODO(antoyo): use __negdi2 and __negti2 instead?
             let element_type = typ.dyncast_array().expect("element type");
             let values = [
                 self.cx.context.new_unary_op(None, UnaryOp::BitwiseNegate, element_type, self.low(a)),
@@ -52,9 +51,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             self.cx.context.new_unary_op(None, UnaryOp::Minus, a.get_type(), a)
         }
         else {
-            let param_a = self.context.new_parameter(None, a_type, "a");
-            let func = self.context.new_function(None, FunctionType::Extern, a_type, &[param_a], "__negti2", false);
-            self.context.new_call(None, func, &[a])
+            self.gcc_add(self.gcc_not(a), self.gcc_int(a_type, 1))
         }
     }
 
@@ -353,23 +350,63 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         (res.dereference(None).to_rvalue(), overflow)
     }
 
-    pub fn gcc_icmp(&self, op: IntPredicate, mut lhs: RValue<'gcc>, mut rhs: RValue<'gcc>) -> RValue<'gcc> {
+    pub fn gcc_icmp(&mut self, op: IntPredicate, mut lhs: RValue<'gcc>, mut rhs: RValue<'gcc>) -> RValue<'gcc> {
         let a_type = lhs.get_type();
         let b_type = rhs.get_type();
         if self.is_non_native_int_type(a_type) || self.is_non_native_int_type(b_type) {
-            let signed = a_type.is_compatible_with(self.i128_type);
-            let sign =
-                if signed {
-                    ""
-                }
-                else {
-                    "u"
-                };
-            let func_name = format!("__{}cmpti2", sign);
-            let param_a = self.context.new_parameter(None, a_type, "a");
-            let param_b = self.context.new_parameter(None, b_type, "b");
-            let func = self.context.new_function(None, FunctionType::Extern, self.int_type, &[param_a, param_b], func_name, false);
-            let cmp = self.context.new_call(None, func, &[lhs, rhs]);
+            // This algorithm is based on compiler-rt's __cmpti2:
+            // https://github.com/llvm-mirror/compiler-rt/blob/f0745e8476f069296a7c71accedd061dce4cdf79/lib/builtins/cmpti2.c#L21
+            let result = self.current_func().new_local(None, self.int_type, "icmp_result");
+            let block1 = self.current_func().new_block("block1");
+            let block2 = self.current_func().new_block("block2");
+            let block3 = self.current_func().new_block("block3");
+            let block4 = self.current_func().new_block("block4");
+            let block5 = self.current_func().new_block("block5");
+            let block6 = self.current_func().new_block("block6");
+            let block7 = self.current_func().new_block("block7");
+            let block8 = self.current_func().new_block("block8");
+            let after = self.current_func().new_block("after");
+
+            let native_int_type = a_type.dyncast_array().expect("get element type");
+            // NOTE: cast low to its unsigned type in order to perform a comparison correctly (e.g.
+            // the sign is only on high).
+            let unsigned_type = native_int_type.to_unsigned(&self.cx);
+
+            let lhs_low = self.context.new_cast(None, self.low(lhs), unsigned_type);
+            let rhs_low = self.context.new_cast(None, self.low(rhs), unsigned_type);
+
+            let condition = self.context.new_comparison(None, ComparisonOp::LessThan, self.high(lhs), self.high(rhs));
+            self.llbb().end_with_conditional(None, condition, block1, block2);
+
+            block1.add_assignment(None, result, self.context.new_rvalue_zero(self.int_type));
+            block1.end_with_jump(None, after);
+
+            let condition = self.context.new_comparison(None, ComparisonOp::GreaterThan, self.high(lhs), self.high(rhs));
+            block2.end_with_conditional(None, condition, block3, block4);
+
+            block3.add_assignment(None, result, self.context.new_rvalue_from_int(self.int_type, 2));
+            block3.end_with_jump(None, after);
+
+            let condition = self.context.new_comparison(None, ComparisonOp::LessThan, lhs_low, rhs_low);
+            block4.end_with_conditional(None, condition, block5, block6);
+
+            block5.add_assignment(None, result, self.context.new_rvalue_zero(self.int_type));
+            block5.end_with_jump(None, after);
+
+            let condition = self.context.new_comparison(None, ComparisonOp::GreaterThan, lhs_low, rhs_low);
+            block6.end_with_conditional(None, condition, block7, block8);
+
+            block7.add_assignment(None, result, self.context.new_rvalue_from_int(self.int_type, 2));
+            block7.end_with_jump(None, after);
+
+            block8.add_assignment(None, result, self.context.new_rvalue_one(self.int_type));
+            block8.end_with_jump(None, after);
+
+            // NOTE: since jumps were added in a place rustc does not expect, the current block in the
+            // state need to be updated.
+            self.switch_to_block(after);
+
+            let cmp = result.to_rvalue();
             let (op, limit) =
                 match op {
                     IntPredicate::IntEQ => {
@@ -546,7 +583,12 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
     }
 
     pub fn gcc_uint(&self, typ: Type<'gcc>, int: u64) -> RValue<'gcc> {
-        if self.is_native_int_type_or_bool(typ) {
+        if typ.is_u128(self) {
+            // FIXME(antoyo): libgccjit cannot create 128-bit values yet.
+            let num = self.context.new_rvalue_from_long(self.u64_type, int as i64);
+            self.gcc_int_cast(num, typ)
+        }
+        else if self.is_native_int_type_or_bool(typ) {
             self.context.new_rvalue_from_long(typ, u64::try_from(int).expect("u64::try_from") as i64)
         }
         else {
@@ -572,6 +614,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
             }
         }
         else if typ.is_i128(self) {
+            // FIXME(antoyo): libgccjit cannot create 128-bit values yet.
             let num = self.context.new_rvalue_from_long(self.u64_type, num as u64 as i64);
             self.gcc_int_cast(num, typ)
         }
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs b/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
index 438eab78943..e01299d32fd 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
@@ -2254,6 +2254,42 @@ match name {
     "llvm.hexagon.prefetch" => "__builtin_HEXAGON_prefetch",
     "llvm.hexagon.vmemcpy" => "__builtin_hexagon_vmemcpy",
     "llvm.hexagon.vmemset" => "__builtin_hexagon_vmemset",
+    // loongarch
+    "llvm.loongarch.asrtgt.d" => "__builtin_loongarch_asrtgt_d",
+    "llvm.loongarch.asrtle.d" => "__builtin_loongarch_asrtle_d",
+    "llvm.loongarch.break" => "__builtin_loongarch_break",
+    "llvm.loongarch.cacop.d" => "__builtin_loongarch_cacop_d",
+    "llvm.loongarch.cacop.w" => "__builtin_loongarch_cacop_w",
+    "llvm.loongarch.cpucfg" => "__builtin_loongarch_cpucfg",
+    "llvm.loongarch.crc.w.b.w" => "__builtin_loongarch_crc_w_b_w",
+    "llvm.loongarch.crc.w.d.w" => "__builtin_loongarch_crc_w_d_w",
+    "llvm.loongarch.crc.w.h.w" => "__builtin_loongarch_crc_w_h_w",
+    "llvm.loongarch.crc.w.w.w" => "__builtin_loongarch_crc_w_w_w",
+    "llvm.loongarch.crcc.w.b.w" => "__builtin_loongarch_crcc_w_b_w",
+    "llvm.loongarch.crcc.w.d.w" => "__builtin_loongarch_crcc_w_d_w",
+    "llvm.loongarch.crcc.w.h.w" => "__builtin_loongarch_crcc_w_h_w",
+    "llvm.loongarch.crcc.w.w.w" => "__builtin_loongarch_crcc_w_w_w",
+    "llvm.loongarch.csrrd.d" => "__builtin_loongarch_csrrd_d",
+    "llvm.loongarch.csrrd.w" => "__builtin_loongarch_csrrd_w",
+    "llvm.loongarch.csrwr.d" => "__builtin_loongarch_csrwr_d",
+    "llvm.loongarch.csrwr.w" => "__builtin_loongarch_csrwr_w",
+    "llvm.loongarch.csrxchg.d" => "__builtin_loongarch_csrxchg_d",
+    "llvm.loongarch.csrxchg.w" => "__builtin_loongarch_csrxchg_w",
+    "llvm.loongarch.dbar" => "__builtin_loongarch_dbar",
+    "llvm.loongarch.ibar" => "__builtin_loongarch_ibar",
+    "llvm.loongarch.iocsrrd.b" => "__builtin_loongarch_iocsrrd_b",
+    "llvm.loongarch.iocsrrd.d" => "__builtin_loongarch_iocsrrd_d",
+    "llvm.loongarch.iocsrrd.h" => "__builtin_loongarch_iocsrrd_h",
+    "llvm.loongarch.iocsrrd.w" => "__builtin_loongarch_iocsrrd_w",
+    "llvm.loongarch.iocsrwr.b" => "__builtin_loongarch_iocsrwr_b",
+    "llvm.loongarch.iocsrwr.d" => "__builtin_loongarch_iocsrwr_d",
+    "llvm.loongarch.iocsrwr.h" => "__builtin_loongarch_iocsrwr_h",
+    "llvm.loongarch.iocsrwr.w" => "__builtin_loongarch_iocsrwr_w",
+    "llvm.loongarch.lddir.d" => "__builtin_loongarch_lddir_d",
+    "llvm.loongarch.ldpte.d" => "__builtin_loongarch_ldpte_d",
+    "llvm.loongarch.movfcsr2gr" => "__builtin_loongarch_movfcsr2gr",
+    "llvm.loongarch.movgr2fcsr" => "__builtin_loongarch_movgr2fcsr",
+    "llvm.loongarch.syscall" => "__builtin_loongarch_syscall",
     // mips
     "llvm.mips.absq.s.ph" => "__builtin_mips_absq_s_ph",
     "llvm.mips.absq.s.qb" => "__builtin_mips_absq_s_qb",
@@ -2954,6 +2990,8 @@ match name {
     "llvm.nvvm.barrier0.and" => "__nvvm_bar0_and",
     "llvm.nvvm.barrier0.or" => "__nvvm_bar0_or",
     "llvm.nvvm.barrier0.popc" => "__nvvm_bar0_popc",
+    "llvm.nvvm.bf2h.rn" => "__nvvm_bf2h_rn",
+    "llvm.nvvm.bf2h.rn.ftz" => "__nvvm_bf2h_rn_ftz",
     "llvm.nvvm.bitcast.d2ll" => "__nvvm_bitcast_d2ll",
     "llvm.nvvm.bitcast.f2i" => "__nvvm_bitcast_f2i",
     "llvm.nvvm.bitcast.i2f" => "__nvvm_bitcast_i2f",
@@ -3016,8 +3054,6 @@ match name {
     "llvm.nvvm.div.rz.ftz.f" => "__nvvm_div_rz_ftz_f",
     "llvm.nvvm.ex2.approx.d" => "__nvvm_ex2_approx_d",
     "llvm.nvvm.ex2.approx.f" => "__nvvm_ex2_approx_f",
-    "llvm.nvvm.ex2.approx.f16" => "__nvvm_ex2_approx_f16",
-    "llvm.nvvm.ex2.approx.f16x2" => "__nvvm_ex2_approx_f16x2",
     "llvm.nvvm.ex2.approx.ftz.f" => "__nvvm_ex2_approx_ftz_f",
     "llvm.nvvm.f2bf16.rn" => "__nvvm_f2bf16_rn",
     "llvm.nvvm.f2bf16.rn.relu" => "__nvvm_f2bf16_rn_relu",
@@ -3079,11 +3115,17 @@ match name {
     "llvm.nvvm.fma.rn.bf16x2" => "__nvvm_fma_rn_bf16x2",
     "llvm.nvvm.fma.rn.d" => "__nvvm_fma_rn_d",
     "llvm.nvvm.fma.rn.f" => "__nvvm_fma_rn_f",
-    "llvm.nvvm.fma.rn.f16" => "__nvvm_fma_rn_f16",
-    "llvm.nvvm.fma.rn.f16x2" => "__nvvm_fma_rn_f16x2",
+    "llvm.nvvm.fma.rn.ftz.bf16" => "__nvvm_fma_rn_ftz_bf16",
+    "llvm.nvvm.fma.rn.ftz.bf16x2" => "__nvvm_fma_rn_ftz_bf16x2",
     "llvm.nvvm.fma.rn.ftz.f" => "__nvvm_fma_rn_ftz_f",
+    "llvm.nvvm.fma.rn.ftz.relu.bf16" => "__nvvm_fma_rn_ftz_relu_bf16",
+    "llvm.nvvm.fma.rn.ftz.relu.bf16x2" => "__nvvm_fma_rn_ftz_relu_bf16x2",
+    "llvm.nvvm.fma.rn.ftz.sat.bf16" => "__nvvm_fma_rn_ftz_sat_bf16",
+    "llvm.nvvm.fma.rn.ftz.sat.bf16x2" => "__nvvm_fma_rn_ftz_sat_bf16x2",
     "llvm.nvvm.fma.rn.relu.bf16" => "__nvvm_fma_rn_relu_bf16",
     "llvm.nvvm.fma.rn.relu.bf16x2" => "__nvvm_fma_rn_relu_bf16x2",
+    "llvm.nvvm.fma.rn.sat.bf16" => "__nvvm_fma_rn_sat_bf16",
+    "llvm.nvvm.fma.rn.sat.bf16x2" => "__nvvm_fma_rn_sat_bf16x2",
     "llvm.nvvm.fma.rp.d" => "__nvvm_fma_rp_d",
     "llvm.nvvm.fma.rp.f" => "__nvvm_fma_rp_f",
     "llvm.nvvm.fma.rp.ftz.f" => "__nvvm_fma_rp_ftz_f",
@@ -3094,11 +3136,17 @@ match name {
     "llvm.nvvm.fmax.bf16x2" => "__nvvm_fmax_bf16x2",
     "llvm.nvvm.fmax.d" => "__nvvm_fmax_d",
     "llvm.nvvm.fmax.f" => "__nvvm_fmax_f",
-    "llvm.nvvm.fmax.f16" => "__nvvm_fmax_f16",
-    "llvm.nvvm.fmax.f16x2" => "__nvvm_fmax_f16x2",
+    "llvm.nvvm.fmax.ftz.bf16" => "__nvvm_fmax_ftz_bf16",
+    "llvm.nvvm.fmax.ftz.bf16x2" => "__nvvm_fmax_ftz_bf16x2",
     "llvm.nvvm.fmax.ftz.f" => "__nvvm_fmax_ftz_f",
+    "llvm.nvvm.fmax.ftz.nan.bf16" => "__nvvm_fmax_ftz_nan_bf16",
+    "llvm.nvvm.fmax.ftz.nan.bf16x2" => "__nvvm_fmax_ftz_nan_bf16x2",
     "llvm.nvvm.fmax.ftz.nan.f" => "__nvvm_fmax_ftz_nan_f",
+    "llvm.nvvm.fmax.ftz.nan.xorsign.abs.bf16" => "__nvvm_fmax_ftz_nan_xorsign_abs_bf16",
+    "llvm.nvvm.fmax.ftz.nan.xorsign.abs.bf16x2" => "__nvvm_fmax_ftz_nan_xorsign_abs_bf16x2",
     "llvm.nvvm.fmax.ftz.nan.xorsign.abs.f" => "__nvvm_fmax_ftz_nan_xorsign_abs_f",
+    "llvm.nvvm.fmax.ftz.xorsign.abs.bf16" => "__nvvm_fmax_ftz_xorsign_abs_bf16",
+    "llvm.nvvm.fmax.ftz.xorsign.abs.bf16x2" => "__nvvm_fmax_ftz_xorsign_abs_bf16x2",
     "llvm.nvvm.fmax.ftz.xorsign.abs.f" => "__nvvm_fmax_ftz_xorsign_abs_f",
     "llvm.nvvm.fmax.nan.bf16" => "__nvvm_fmax_nan_bf16",
     "llvm.nvvm.fmax.nan.bf16x2" => "__nvvm_fmax_nan_bf16x2",
@@ -3113,11 +3161,17 @@ match name {
     "llvm.nvvm.fmin.bf16x2" => "__nvvm_fmin_bf16x2",
     "llvm.nvvm.fmin.d" => "__nvvm_fmin_d",
     "llvm.nvvm.fmin.f" => "__nvvm_fmin_f",
-    "llvm.nvvm.fmin.f16" => "__nvvm_fmin_f16",
-    "llvm.nvvm.fmin.f16x2" => "__nvvm_fmin_f16x2",
+    "llvm.nvvm.fmin.ftz.bf16" => "__nvvm_fmin_ftz_bf16",
+    "llvm.nvvm.fmin.ftz.bf16x2" => "__nvvm_fmin_ftz_bf16x2",
     "llvm.nvvm.fmin.ftz.f" => "__nvvm_fmin_ftz_f",
+    "llvm.nvvm.fmin.ftz.nan.bf16" => "__nvvm_fmin_ftz_nan_bf16",
+    "llvm.nvvm.fmin.ftz.nan.bf16x2" => "__nvvm_fmin_ftz_nan_bf16x2",
     "llvm.nvvm.fmin.ftz.nan.f" => "__nvvm_fmin_ftz_nan_f",
+    "llvm.nvvm.fmin.ftz.nan.xorsign.abs.bf16" => "__nvvm_fmin_ftz_nan_xorsign_abs_bf16",
+    "llvm.nvvm.fmin.ftz.nan.xorsign.abs.bf16x2" => "__nvvm_fmin_ftz_nan_xorsign_abs_bf16x2",
     "llvm.nvvm.fmin.ftz.nan.xorsign.abs.f" => "__nvvm_fmin_ftz_nan_xorsign_abs_f",
+    "llvm.nvvm.fmin.ftz.xorsign.abs.bf16" => "__nvvm_fmin_ftz_xorsign_abs_bf16",
+    "llvm.nvvm.fmin.ftz.xorsign.abs.bf16x2" => "__nvvm_fmin_ftz_xorsign_abs_bf16x2",
     "llvm.nvvm.fmin.ftz.xorsign.abs.f" => "__nvvm_fmin_ftz_xorsign_abs_f",
     "llvm.nvvm.fmin.nan.bf16" => "__nvvm_fmin_nan_bf16",
     "llvm.nvvm.fmin.nan.bf16x2" => "__nvvm_fmin_nan_bf16x2",
@@ -4213,6 +4267,28 @@ match name {
     "llvm.r600.read.tgid.x" => "__builtin_r600_read_tgid_x",
     "llvm.r600.read.tgid.y" => "__builtin_r600_read_tgid_y",
     "llvm.r600.read.tgid.z" => "__builtin_r600_read_tgid_z",
+    // riscv
+    "llvm.riscv.aes32dsi" => "__builtin_riscv_aes32dsi",
+    "llvm.riscv.aes32dsmi" => "__builtin_riscv_aes32dsmi",
+    "llvm.riscv.aes32esi" => "__builtin_riscv_aes32esi",
+    "llvm.riscv.aes32esmi" => "__builtin_riscv_aes32esmi",
+    "llvm.riscv.aes64ds" => "__builtin_riscv_aes64ds",
+    "llvm.riscv.aes64dsm" => "__builtin_riscv_aes64dsm",
+    "llvm.riscv.aes64es" => "__builtin_riscv_aes64es",
+    "llvm.riscv.aes64esm" => "__builtin_riscv_aes64esm",
+    "llvm.riscv.aes64im" => "__builtin_riscv_aes64im",
+    "llvm.riscv.aes64ks1i" => "__builtin_riscv_aes64ks1i",
+    "llvm.riscv.aes64ks2" => "__builtin_riscv_aes64ks2",
+    "llvm.riscv.sha512sig0" => "__builtin_riscv_sha512sig0",
+    "llvm.riscv.sha512sig0h" => "__builtin_riscv_sha512sig0h",
+    "llvm.riscv.sha512sig0l" => "__builtin_riscv_sha512sig0l",
+    "llvm.riscv.sha512sig1" => "__builtin_riscv_sha512sig1",
+    "llvm.riscv.sha512sig1h" => "__builtin_riscv_sha512sig1h",
+    "llvm.riscv.sha512sig1l" => "__builtin_riscv_sha512sig1l",
+    "llvm.riscv.sha512sum0" => "__builtin_riscv_sha512sum0",
+    "llvm.riscv.sha512sum0r" => "__builtin_riscv_sha512sum0r",
+    "llvm.riscv.sha512sum1" => "__builtin_riscv_sha512sum1",
+    "llvm.riscv.sha512sum1r" => "__builtin_riscv_sha512sum1r",
     // s390
     "llvm.s390.efpc" => "__builtin_s390_efpc",
     "llvm.s390.etnd" => "__builtin_tx_nesting_depth",
@@ -5912,6 +5988,18 @@ match name {
     "llvm.x86.avx2.vpdpbuud.256" => "__builtin_ia32_vpdpbuud256",
     "llvm.x86.avx2.vpdpbuuds.128" => "__builtin_ia32_vpdpbuuds128",
     "llvm.x86.avx2.vpdpbuuds.256" => "__builtin_ia32_vpdpbuuds256",
+    "llvm.x86.avx2.vpdpwsud.128" => "__builtin_ia32_vpdpwsud128",
+    "llvm.x86.avx2.vpdpwsud.256" => "__builtin_ia32_vpdpwsud256",
+    "llvm.x86.avx2.vpdpwsuds.128" => "__builtin_ia32_vpdpwsuds128",
+    "llvm.x86.avx2.vpdpwsuds.256" => "__builtin_ia32_vpdpwsuds256",
+    "llvm.x86.avx2.vpdpwusd.128" => "__builtin_ia32_vpdpwusd128",
+    "llvm.x86.avx2.vpdpwusd.256" => "__builtin_ia32_vpdpwusd256",
+    "llvm.x86.avx2.vpdpwusds.128" => "__builtin_ia32_vpdpwusds128",
+    "llvm.x86.avx2.vpdpwusds.256" => "__builtin_ia32_vpdpwusds256",
+    "llvm.x86.avx2.vpdpwuud.128" => "__builtin_ia32_vpdpwuud128",
+    "llvm.x86.avx2.vpdpwuud.256" => "__builtin_ia32_vpdpwuud256",
+    "llvm.x86.avx2.vpdpwuuds.128" => "__builtin_ia32_vpdpwuuds128",
+    "llvm.x86.avx2.vpdpwuuds.256" => "__builtin_ia32_vpdpwuuds256",
     "llvm.x86.avx2.vperm2i128" => "__builtin_ia32_permti256",
     "llvm.x86.avx512.add.pd.512" => "__builtin_ia32_addpd512",
     "llvm.x86.avx512.add.ps.512" => "__builtin_ia32_addps512",
@@ -7909,6 +7997,16 @@ match name {
     "llvm.x86.vgf2p8mulb.128" => "__builtin_ia32_vgf2p8mulb_v16qi",
     "llvm.x86.vgf2p8mulb.256" => "__builtin_ia32_vgf2p8mulb_v32qi",
     "llvm.x86.vgf2p8mulb.512" => "__builtin_ia32_vgf2p8mulb_v64qi",
+    "llvm.x86.vsha512msg1" => "__builtin_ia32_vsha512msg1",
+    "llvm.x86.vsha512msg2" => "__builtin_ia32_vsha512msg2",
+    "llvm.x86.vsha512rnds2" => "__builtin_ia32_vsha512rnds2",
+    "llvm.x86.vsm3msg1" => "__builtin_ia32_vsm3msg1",
+    "llvm.x86.vsm3msg2" => "__builtin_ia32_vsm3msg2",
+    "llvm.x86.vsm3rnds2" => "__builtin_ia32_vsm3rnds2",
+    "llvm.x86.vsm4key4128" => "__builtin_ia32_vsm4key4128",
+    "llvm.x86.vsm4key4256" => "__builtin_ia32_vsm4key4256",
+    "llvm.x86.vsm4rnds4128" => "__builtin_ia32_vsm4rnds4128",
+    "llvm.x86.vsm4rnds4256" => "__builtin_ia32_vsm4rnds4256",
     "llvm.x86.wbinvd" => "__builtin_ia32_wbinvd",
     "llvm.x86.wbnoinvd" => "__builtin_ia32_wbnoinvd",
     "llvm.x86.wrfsbase.32" => "__builtin_ia32_wrfsbase32",
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs b/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs
index f28348380d7..5996623bdc5 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs
@@ -236,11 +236,17 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
                 let arg2 = builder.context.new_cast(None, arg2, arg2_type);
                 args = vec![new_args[0], arg2].into();
             },
+            // These builtins are sent one more argument than needed.
             "__builtin_prefetch" => {
                 let mut new_args = args.to_vec();
                 new_args.pop();
                 args = new_args.into();
             },
+            // The GCC version returns one value of the tuple through a pointer.
+            "__builtin_ia32_rdrand64_step" => {
+                let arg = builder.current_func().new_local(None, builder.ulonglong_type, "return_rdrand_arg");
+                args = vec![arg.get_address(None)].into();
+            },
             _ => (),
         }
     }
@@ -361,6 +367,19 @@ pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc,
             // builtin twice, we overwrite the return value with a dummy value.
             return_value = builder.context.new_rvalue_zero(builder.int_type);
         },
+        "__builtin_ia32_rdrand64_step" => {
+            let random_number = args[0].dereference(None).to_rvalue();
+            let success_variable = builder.current_func().new_local(None, return_value.get_type(), "success");
+            builder.llbb().add_assignment(None, success_variable, return_value);
+
+            let field1 = builder.context.new_field(None, random_number.get_type(), "random_number");
+            let field2 = builder.context.new_field(None, return_value.get_type(), "success");
+            let struct_type = builder.context.new_struct_type(None, "rdrand_result", &[field1, field2]);
+            return_value = builder.context.new_struct_constructor(None, struct_type.as_type(), None, &[
+                random_number,
+                success_variable.to_rvalue(),
+            ]);
+        },
         _ => (),
     }
 
@@ -613,6 +632,7 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
         "llvm.fshr.v8i16" => "__builtin_ia32_vpshrdv_v8hi",
         "llvm.x86.fma.vfmadd.sd" => "__builtin_ia32_vfmaddsd3",
         "llvm.x86.fma.vfmadd.ss" => "__builtin_ia32_vfmaddss3",
+        "llvm.x86.rdrand.64" => "__builtin_ia32_rdrand64_step",
 
         // The above doc points to unknown builtins for the following, so override them:
         "llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si",
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
index 68a087a1d7f..9caed459a29 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
@@ -10,9 +10,9 @@ use rustc_codegen_ssa::base::wants_msvc_seh;
 use rustc_codegen_ssa::common::IntPredicate;
 use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue};
 use rustc_codegen_ssa::mir::place::PlaceRef;
-use rustc_codegen_ssa::traits::{ArgAbiMethods, BaseTypeMethods, BuilderMethods, ConstMethods, IntrinsicCallMethods};
+use rustc_codegen_ssa::traits::{ArgAbiMethods, BuilderMethods, ConstMethods, IntrinsicCallMethods};
 #[cfg(feature="master")]
-use rustc_codegen_ssa::traits::MiscMethods;
+use rustc_codegen_ssa::traits::{BaseTypeMethods, MiscMethods};
 use rustc_codegen_ssa::errors::InvalidMonomorphization;
 use rustc_middle::bug;
 use rustc_middle::ty::{self, Instance, Ty};
diff --git a/compiler/rustc_codegen_gcc/src/lib.rs b/compiler/rustc_codegen_gcc/src/lib.rs
index ce7e31682f1..fe233930560 100644
--- a/compiler/rustc_codegen_gcc/src/lib.rs
+++ b/compiler/rustc_codegen_gcc/src/lib.rs
@@ -2,6 +2,12 @@
  * TODO(antoyo): implement equality in libgccjit based on https://zpz.github.io/blog/overloading-equality-operator-in-cpp-class-hierarchy/ (for type equality?)
  * TODO(antoyo): support #[inline] attributes.
  * TODO(antoyo): support LTO (gcc's equivalent to Full LTO is -flto -flto-partition=one — https://documentation.suse.com/sbp/all/html/SBP-GCC-10/index.html).
+ * For Thin LTO, this might be helpful:
+ * In gcc 4.6 -fwhopr was removed and became default with -flto. The non-whopr path can still be executed via -flto-partition=none.
+ *
+ * Maybe some missing optizations enabled by rustc's LTO is in there: https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html
+ * Like -fipa-icf (should be already enabled) and maybe -fdevirtualize-at-ltrans.
+ * TODO: disable debug info always being emitted. Perhaps this slows down things?
  *
  * TODO(antoyo): remove the patches.
  */
@@ -28,6 +34,7 @@ extern crate rustc_codegen_ssa;
 extern crate rustc_data_structures;
 extern crate rustc_errors;
 extern crate rustc_fluent_macro;
+extern crate rustc_fs_util;
 extern crate rustc_hir;
 extern crate rustc_macros;
 extern crate rustc_metadata;
@@ -35,7 +42,8 @@ extern crate rustc_middle;
 extern crate rustc_session;
 extern crate rustc_span;
 extern crate rustc_target;
-extern crate tempfile;
+#[macro_use]
+extern crate tracing;
 
 // This prevents duplicating functions and statics that are already part of the host rustc process.
 #[allow(unused_extern_crates)]
@@ -57,6 +65,7 @@ mod coverageinfo;
 mod debuginfo;
 mod declare;
 mod errors;
+mod gcc_util;
 mod int;
 mod intrinsic;
 mod mono_item;
@@ -64,18 +73,27 @@ mod type_;
 mod type_of;
 
 use std::any::Any;
-use std::sync::{Arc, Mutex};
-
-use crate::errors::LTONotSupported;
-use gccjit::{Context, OptimizationLevel, CType};
+use std::sync::Arc;
+use std::sync::Mutex;
+#[cfg(not(feature="master"))]
+use std::sync::atomic::AtomicBool;
+#[cfg(not(feature="master"))]
+use std::sync::atomic::Ordering;
+
+use gccjit::{Context, OptimizationLevel};
+#[cfg(feature="master")]
+use gccjit::TargetInfo;
+#[cfg(not(feature="master"))]
+use gccjit::CType;
+use errors::LTONotSupported;
 use rustc_ast::expand::allocator::AllocatorKind;
 use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen};
 use rustc_codegen_ssa::base::codegen_crate;
 use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput, ModuleConfig, TargetMachineFactoryFn};
 use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule};
 use rustc_codegen_ssa::target_features::supported_target_features;
-use rustc_codegen_ssa::traits::{CodegenBackend, ExtraBackendMethods, ModuleBufferMethods, ThinBufferMethods, WriteBackendMethods};
 use rustc_data_structures::fx::FxIndexMap;
+use rustc_codegen_ssa::traits::{CodegenBackend, ExtraBackendMethods, ThinBufferMethods, WriteBackendMethods};
 use rustc_errors::{DiagnosticMessage, ErrorGuaranteed, Handler, SubdiagnosticMessage};
 use rustc_fluent_macro::fluent_messages;
 use rustc_metadata::EncodedMetadata;
@@ -88,6 +106,9 @@ use rustc_span::Symbol;
 use rustc_span::fatal_error::FatalError;
 use tempfile::TempDir;
 
+use crate::back::lto::ModuleBuffer;
+use crate::gcc_util::target_cpu;
+
 fluent_messages! { "../messages.ftl" }
 
 pub struct PrintOnPanic<F: Fn() -> String>(pub F);
@@ -100,9 +121,41 @@ impl<F: Fn() -> String> Drop for PrintOnPanic<F> {
     }
 }
 
+#[cfg(not(feature="master"))]
+#[derive(Debug)]
+pub struct TargetInfo {
+    supports_128bit_integers: AtomicBool,
+}
+
+#[cfg(not(feature="master"))]
+impl TargetInfo {
+    fn cpu_supports(&self, _feature: &str) -> bool {
+        false
+    }
+
+    fn supports_128bit_int(&self) -> bool {
+        self.supports_128bit_integers.load(Ordering::SeqCst)
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct LockedTargetInfo {
+    info: Arc<Mutex<TargetInfo>>,
+}
+
+impl LockedTargetInfo {
+    fn cpu_supports(&self, feature: &str) -> bool {
+        self.info.lock().expect("lock").cpu_supports(feature)
+    }
+
+    fn supports_128bit_int(&self) -> bool {
+        self.info.lock().expect("lock").supports_128bit_int()
+    }
+}
+
 #[derive(Clone)]
 pub struct GccCodegenBackend {
-    supports_128bit_integers: Arc<Mutex<bool>>,
+    target_info: LockedTargetInfo,
 }
 
 impl CodegenBackend for GccCodegenBackend {
@@ -112,24 +165,40 @@ impl CodegenBackend for GccCodegenBackend {
 
     fn init(&self, sess: &Session) {
         #[cfg(feature="master")]
+        {
+            let target_cpu = target_cpu(sess);
+
+            // Get the second TargetInfo with the correct CPU features by setting the arch.
+            let context = Context::default();
+            if target_cpu != "generic" {
+                context.add_command_line_option(&format!("-march={}", target_cpu));
+            }
+
+            *self.target_info.info.lock().expect("lock") = context.get_target_info();
+        }
+
+        #[cfg(feature="master")]
         gccjit::set_global_personality_function_name(b"rust_eh_personality\0");
-        if sess.lto() != Lto::No {
+        if sess.lto() == Lto::Thin {
             sess.emit_warning(LTONotSupported {});
         }
 
-        let temp_dir = TempDir::new().expect("cannot create temporary directory");
-        let temp_file = temp_dir.into_path().join("result.asm");
-        let check_context = Context::default();
-        check_context.set_print_errors_to_stderr(false);
-        let _int128_ty = check_context.new_c_type(CType::UInt128t);
-        // NOTE: we cannot just call compile() as this would require other files than libgccjit.so.
-        check_context.compile_to_file(gccjit::OutputKind::Assembler, temp_file.to_str().expect("path to str"));
-        *self.supports_128bit_integers.lock().expect("lock") = check_context.get_last_error() == Ok(None);
+        #[cfg(not(feature="master"))]
+        {
+            let temp_dir = TempDir::new().expect("cannot create temporary directory");
+            let temp_file = temp_dir.into_path().join("result.asm");
+            let check_context = Context::default();
+            check_context.set_print_errors_to_stderr(false);
+            let _int128_ty = check_context.new_c_type(CType::UInt128t);
+            // NOTE: we cannot just call compile() as this would require other files than libgccjit.so.
+            check_context.compile_to_file(gccjit::OutputKind::Assembler, temp_file.to_str().expect("path to str"));
+            self.target_info.info.lock().expect("lock").supports_128bit_integers.store(check_context.get_last_error() == Ok(None), Ordering::SeqCst);
+        }
     }
 
     fn provide(&self, providers: &mut Providers) {
-        // FIXME(antoyo) compute list of enabled features from cli flags
-        providers.global_backend_features = |_tcx, ()| vec![];
+        providers.global_backend_features =
+            |tcx, ()| gcc_util::global_gcc_features(tcx.sess, true)
     }
 
     fn codegen_crate<'tcx>(&self, tcx: TyCtxt<'tcx>, metadata: EncodedMetadata, need_metadata_module: bool) -> Box<dyn Any> {
@@ -160,7 +229,7 @@ impl CodegenBackend for GccCodegenBackend {
     }
 
     fn target_features(&self, sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
-        target_features(sess, allow_unstable)
+        target_features(sess, allow_unstable, &self.target_info)
     }
 }
 
@@ -168,13 +237,18 @@ impl ExtraBackendMethods for GccCodegenBackend {
     fn codegen_allocator<'tcx>(&self, tcx: TyCtxt<'tcx>, module_name: &str, kind: AllocatorKind, alloc_error_handler_kind: AllocatorKind) -> Self::Module {
         let mut mods = GccContext {
             context: Context::default(),
+            should_combine_object_files: false,
+            temp_dir: None,
         };
+
+        // TODO(antoyo): only set for x86.
+        mods.context.add_command_line_option("-masm=intel");
         unsafe { allocator::codegen(tcx, &mut mods, module_name, kind, alloc_error_handler_kind); }
         mods
     }
 
     fn compile_codegen_unit(&self, tcx: TyCtxt<'_>, cgu_name: Symbol) -> (ModuleCodegen<Self::Module>, u64) {
-        base::compile_codegen_unit(tcx, cgu_name, *self.supports_128bit_integers.lock().expect("lock"))
+        base::compile_codegen_unit(tcx, cgu_name, self.target_info.clone())
     }
 
     fn target_machine_factory(&self, _sess: &Session, _opt_level: OptLevel, _features: &[String]) -> TargetMachineFactoryFn<Self> {
@@ -185,14 +259,6 @@ impl ExtraBackendMethods for GccCodegenBackend {
     }
 }
 
-pub struct ModuleBuffer;
-
-impl ModuleBufferMethods for ModuleBuffer {
-    fn data(&self) -> &[u8] {
-        unimplemented!();
-    }
-}
-
 pub struct ThinBuffer;
 
 impl ThinBufferMethods for ThinBuffer {
@@ -203,6 +269,9 @@ impl ThinBufferMethods for ThinBuffer {
 
 pub struct GccContext {
     context: Context<'static>,
+    should_combine_object_files: bool,
+    // Temporary directory used by LTO. We keep it here so that it's not removed before linking.
+    temp_dir: Option<TempDir>,
 }
 
 unsafe impl Send for GccContext {}
@@ -217,18 +286,8 @@ impl WriteBackendMethods for GccCodegenBackend {
     type ThinData = ();
     type ThinBuffer = ThinBuffer;
 
-    fn run_fat_lto(_cgcx: &CodegenContext<Self>, mut modules: Vec<FatLtoInput<Self>>, _cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>) -> Result<LtoModuleCodegen<Self>, FatalError> {
-        // TODO(antoyo): implement LTO by sending -flto to libgccjit and adding the appropriate gcc linker plugins.
-        // NOTE: implemented elsewhere.
-        // TODO(antoyo): what is implemented elsewhere ^ ?
-        let module =
-            match modules.remove(0) {
-                FatLtoInput::InMemory(module) => module,
-                FatLtoInput::Serialized { .. } => {
-                    unimplemented!();
-                }
-            };
-        Ok(LtoModuleCodegen::Fat { module, _serialized_bitcode: vec![] })
+    fn run_fat_lto(cgcx: &CodegenContext<Self>, modules: Vec<FatLtoInput<Self>>, cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>) -> Result<LtoModuleCodegen<Self>, FatalError> {
+        back::lto::run_fat(cgcx, modules, cached_modules)
     }
 
     fn run_thin_lto(_cgcx: &CodegenContext<Self>, _modules: Vec<(String, Self::ThinBuffer)>, _cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>) -> Result<(Vec<LtoModuleCodegen<Self>>, Vec<WorkProduct>), FatalError> {
@@ -277,8 +336,19 @@ impl WriteBackendMethods for GccCodegenBackend {
 /// This is the entrypoint for a hot plugged rustc_codegen_gccjit
 #[no_mangle]
 pub fn __rustc_codegen_backend() -> Box<dyn CodegenBackend> {
+    #[cfg(feature="master")]
+    let info = {
+        // Check whether the target supports 128-bit integers.
+        let context = Context::default();
+        Arc::new(Mutex::new(context.get_target_info()))
+    };
+    #[cfg(not(feature="master"))]
+    let info = Arc::new(Mutex::new(TargetInfo {
+        supports_128bit_integers: AtomicBool::new(false),
+    }));
+
     Box::new(GccCodegenBackend {
-        supports_128bit_integers: Arc::new(Mutex::new(false)),
+        target_info: LockedTargetInfo { info },
     })
 }
 
@@ -297,22 +367,7 @@ fn to_gcc_opt_level(optlevel: Option<OptLevel>) -> OptimizationLevel {
     }
 }
 
-fn handle_native(name: &str) -> &str {
-    if name != "native" {
-        return name;
-    }
-
-    unimplemented!();
-}
-
-pub fn target_cpu(sess: &Session) -> &str {
-    match sess.opts.cg.target_cpu {
-        Some(ref name) => handle_native(name),
-        None => handle_native(sess.target.cpu.as_ref()),
-    }
-}
-
-pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
+pub fn target_features(sess: &Session, allow_unstable: bool, target_info: &LockedTargetInfo) -> Vec<Symbol> {
     supported_target_features(sess)
         .iter()
         .filter_map(
@@ -321,26 +376,13 @@ pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
             },
         )
         .filter(|_feature| {
-            // TODO(antoyo): implement a way to get enabled feature in libgccjit.
-            // Probably using the equivalent of __builtin_cpu_supports.
-            // TODO(antoyo): maybe use whatever outputs the following command:
-            // gcc -march=native -Q --help=target
-            #[cfg(feature="master")]
-            {
-                // NOTE: the CPU in the CI doesn't support sse4a, so disable it to make the stdarch tests pass in the CI.
-                (_feature.contains("sse") || _feature.contains("avx")) && !_feature.contains("avx512") && !_feature.contains("sse4a")
-            }
-            #[cfg(not(feature="master"))]
-            {
-                false
-            }
+            target_info.cpu_supports(_feature)
             /*
                adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512ifma,
                avx512pf, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpopcntdq,
                bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, gfni, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm,
                sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, vaes, vpclmulqdq, xsave, xsavec, xsaveopt, xsaves
              */
-            //false
         })
         .map(|feature| Symbol::intern(feature))
         .collect()
diff --git a/compiler/rustc_codegen_gcc/src/type_of.rs b/compiler/rustc_codegen_gcc/src/type_of.rs
index cc467801beb..c2eab295acd 100644
--- a/compiler/rustc_codegen_gcc/src/type_of.rs
+++ b/compiler/rustc_codegen_gcc/src/type_of.rs
@@ -182,6 +182,7 @@ impl<'tcx> LayoutGccExt<'tcx> for TyAndLayout<'tcx> {
     /// of that field's type - this is useful for taking the address of
     /// that field and ensuring the struct has the right alignment.
     fn gcc_type<'gcc>(&self, cx: &CodegenCx<'gcc, 'tcx>) -> Type<'gcc> {
+        use crate::rustc_middle::ty::layout::FnAbiOf;
         // This must produce the same result for `repr(transparent)` wrappers as for the inner type!
         // In other words, this should generally not look at the type at all, but only at the
         // layout.
@@ -191,7 +192,14 @@ impl<'tcx> LayoutGccExt<'tcx> for TyAndLayout<'tcx> {
             if let Some(&ty) = cx.scalar_types.borrow().get(&self.ty) {
                 return ty;
             }
-            let ty = self.scalar_gcc_type_at(cx, scalar, Size::ZERO);
+            let ty =
+                match *self.ty.kind() {
+                    // NOTE: we cannot remove this match like in the LLVM codegen because the call
+                    // to fn_ptr_backend_type handle the on-stack attribute.
+                    // TODO(antoyo): find a less hackish way to hande the on-stack attribute.
+                    ty::FnPtr(sig) => cx.fn_ptr_backend_type(&cx.fn_abi_of_fn_ptr(sig, ty::List::empty())),
+                    _ => self.scalar_gcc_type_at(cx, scalar, Size::ZERO),
+                };
             cx.scalar_types.borrow_mut().insert(self.ty, ty);
             return ty;
         }
author	Antoni Boucher <bouanto@zoho.com>	2023-10-09 15:53:34 -0400
committer	Antoni Boucher <bouanto@zoho.com>	2023-10-09 15:53:34 -0400
commit	30290c8b411a837b2e19293391956f5f779608ab (patch)
tree	0a59bbb2fda11bca09f135f56ea3025c5daea590 /compiler/rustc_codegen_gcc/src
parent	cdddcd3bea35049dab56888d4391cb9b5b1b4491 (diff)
parent	11a0cceab966e5ff1058ddbcab5977e8a1d6d290 (diff)
download	rust-30290c8b411a837b2e19293391956f5f779608ab.tar.gz rust-30290c8b411a837b2e19293391956f5f779608ab.zip