about summary refs log tree commit diff
path: root/compiler/rustc_codegen_gcc/src
diff options
context:
space:
mode:
authorAntoni Boucher <bouanto@zoho.com>2023-06-19 18:51:02 -0400
committerAntoni Boucher <bouanto@zoho.com>2023-06-19 18:51:02 -0400
commit4d96893d853a261e5d59c860108c3ddd3872c544 (patch)
tree5d2c37aea2f88c37d1b8b926456703652719fcda /compiler/rustc_codegen_gcc/src
parent405130538938e2d63a761b7d8d6b27d26e634c33 (diff)
parent1bbee3e217d75e7bc3bfe5d8c1b35e776fce96e6 (diff)
downloadrust-4d96893d853a261e5d59c860108c3ddd3872c544.tar.gz
rust-4d96893d853a261e5d59c860108c3ddd3872c544.zip
Merge commit '1bbee3e217d75e7bc3bfe5d8c1b35e776fce96e6' into sync-cg_gcc-2023-06-19
Diffstat (limited to 'compiler/rustc_codegen_gcc/src')
-rw-r--r--compiler/rustc_codegen_gcc/src/asm.rs1
-rw-r--r--compiler/rustc_codegen_gcc/src/attributes.rs39
-rw-r--r--compiler/rustc_codegen_gcc/src/builder.rs72
-rw-r--r--compiler/rustc_codegen_gcc/src/consts.rs5
-rw-r--r--compiler/rustc_codegen_gcc/src/declare.rs2
-rw-r--r--compiler/rustc_codegen_gcc/src/intrinsic/archs.rs46
-rw-r--r--compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs7
-rw-r--r--compiler/rustc_codegen_gcc/src/intrinsic/mod.rs159
-rw-r--r--compiler/rustc_codegen_gcc/src/intrinsic/simd.rs18
-rw-r--r--compiler/rustc_codegen_gcc/src/lib.rs2
-rw-r--r--compiler/rustc_codegen_gcc/src/type_of.rs6
11 files changed, 158 insertions, 199 deletions
diff --git a/compiler/rustc_codegen_gcc/src/asm.rs b/compiler/rustc_codegen_gcc/src/asm.rs
index 250aa79f8d6..4c3b7f5036c 100644
--- a/compiler/rustc_codegen_gcc/src/asm.rs
+++ b/compiler/rustc_codegen_gcc/src/asm.rs
@@ -518,7 +518,6 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
                 OperandValue::Immediate(op.tmp_var.to_rvalue()).store(self, place);
             }
         }
-
     }
 }
 
diff --git a/compiler/rustc_codegen_gcc/src/attributes.rs b/compiler/rustc_codegen_gcc/src/attributes.rs
index db841b1b524..eb0cce19b85 100644
--- a/compiler/rustc_codegen_gcc/src/attributes.rs
+++ b/compiler/rustc_codegen_gcc/src/attributes.rs
@@ -2,9 +2,13 @@
 use gccjit::FnAttribute;
 use gccjit::Function;
 use rustc_attr::InstructionSetAttr;
+#[cfg(feature="master")]
+use rustc_attr::InlineAttr;
 use rustc_codegen_ssa::target_features::tied_target_features;
 use rustc_data_structures::fx::FxHashMap;
 use rustc_middle::ty;
+#[cfg(feature="master")]
+use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
 use rustc_session::Session;
 use rustc_span::symbol::sym;
 use smallvec::{smallvec, SmallVec};
@@ -67,6 +71,24 @@ fn to_gcc_features<'a>(sess: &Session, s: &'a str) -> SmallVec<[&'a str; 2]> {
     }
 }
 
+/// Get GCC attribute for the provided inline heuristic.
+#[cfg(feature="master")]
+#[inline]
+fn inline_attr<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, inline: InlineAttr) -> Option<FnAttribute<'gcc>> {
+    match inline {
+        InlineAttr::Hint => Some(FnAttribute::Inline),
+        InlineAttr::Always => Some(FnAttribute::AlwaysInline),
+        InlineAttr::Never => {
+            if cx.sess().target.arch != "amdgpu" {
+                Some(FnAttribute::NoInline)
+            } else {
+                None
+            }
+        }
+        InlineAttr::None => None,
+    }
+}
+
 /// Composite function which sets GCC attributes for function depending on its AST (`#[attribute]`)
 /// attributes.
 pub fn from_fn_attrs<'gcc, 'tcx>(
@@ -77,6 +99,23 @@ pub fn from_fn_attrs<'gcc, 'tcx>(
 ) {
     let codegen_fn_attrs = cx.tcx.codegen_fn_attrs(instance.def_id());
 
+    #[cfg(feature="master")]
+    {
+        let inline =
+            if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED) {
+                InlineAttr::Never
+            }
+            else if codegen_fn_attrs.inline == InlineAttr::None && instance.def.requires_inline(cx.tcx) {
+                InlineAttr::Hint
+            }
+            else {
+                codegen_fn_attrs.inline
+            };
+        if let Some(attr) = inline_attr(cx, inline) {
+            func.add_attribute(attr);
+        }
+    }
+
     let function_features =
         codegen_fn_attrs.target_features.iter().map(|features| features.as_str()).collect::<Vec<&str>>();
 
diff --git a/compiler/rustc_codegen_gcc/src/builder.rs b/compiler/rustc_codegen_gcc/src/builder.rs
index f9ea0f00456..43d0aafbd50 100644
--- a/compiler/rustc_codegen_gcc/src/builder.rs
+++ b/compiler/rustc_codegen_gcc/src/builder.rs
@@ -181,6 +181,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             })
             .collect();
 
+        debug_assert_eq!(casted_args.len(), args.len());
+
         Cow::Owned(casted_args)
     }
 
@@ -207,7 +209,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
         let func_name = format!("{:?}", func_ptr);
 
-        let casted_args: Vec<_> = param_types
+        let mut casted_args: Vec<_> = param_types
             .into_iter()
             .zip(args.iter())
             .enumerate()
@@ -237,6 +239,11 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             })
             .collect();
 
+        // NOTE: to take into account variadic functions.
+        for i in casted_args.len()..args.len() {
+            casted_args.push(args[i]);
+        }
+
         Cow::Owned(casted_args)
     }
 
@@ -280,8 +287,17 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         }
     }
 
-    fn function_ptr_call(&mut self, func_ptr: RValue<'gcc>, args: &[RValue<'gcc>], _funclet: Option<&Funclet>) -> RValue<'gcc> {
-        let gcc_func = func_ptr.get_type().dyncast_function_ptr_type().expect("function ptr");
+    fn function_ptr_call(&mut self, typ: Type<'gcc>, mut func_ptr: RValue<'gcc>, args: &[RValue<'gcc>], _funclet: Option<&Funclet>) -> RValue<'gcc> {
+        let gcc_func =
+            match func_ptr.get_type().dyncast_function_ptr_type() {
+                Some(func) => func,
+                None => {
+                    // NOTE: due to opaque pointers now being used, we need to cast here.
+                    let new_func_type = typ.dyncast_function_ptr_type().expect("function ptr");
+                    func_ptr = self.context.new_cast(None, func_ptr, typ);
+                    new_func_type
+                },
+            };
         let func_name = format!("{:?}", func_ptr);
         let previous_arg_count = args.len();
         let orig_args = args;
@@ -424,16 +440,17 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         self.llbb().end_with_void_return(None)
     }
 
-    fn ret(&mut self, value: RValue<'gcc>) {
-        let value =
-            if self.structs_as_pointer.borrow().contains(&value) {
-                // NOTE: hack to workaround a limitation of the rustc API: see comment on
-                // CodegenCx.structs_as_pointer
-                value.dereference(None).to_rvalue()
-            }
-            else {
-                value
-            };
+    fn ret(&mut self, mut value: RValue<'gcc>) {
+        if self.structs_as_pointer.borrow().contains(&value) {
+            // NOTE: hack to workaround a limitation of the rustc API: see comment on
+            // CodegenCx.structs_as_pointer
+            value = value.dereference(None).to_rvalue();
+        }
+        let expected_return_type = self.current_func().get_return_type();
+        if !expected_return_type.is_compatible_with(value.get_type()) {
+            // NOTE: due to opaque pointers now being used, we need to cast here.
+            value = self.context.new_cast(None, value, expected_return_type);
+        }
         self.llbb().end_with_return(None, value);
     }
 
@@ -719,17 +736,25 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         unimplemented!();
     }
 
-    fn load(&mut self, pointee_ty: Type<'gcc>, ptr: RValue<'gcc>, _align: Align) -> RValue<'gcc> {
+    fn load(&mut self, pointee_ty: Type<'gcc>, ptr: RValue<'gcc>, align: Align) -> RValue<'gcc> {
         let block = self.llbb();
         let function = block.get_function();
         // NOTE: instead of returning the dereference here, we have to assign it to a variable in
         // the current basic block. Otherwise, it could be used in another basic block, causing a
         // dereference after a drop, for instance.
-        // TODO(antoyo): handle align of the load instruction.
-        let ptr = self.context.new_cast(None, ptr, pointee_ty.make_pointer());
+        // FIXME(antoyo): this check that we don't call get_aligned() a second time on a type.
+        // Ideally, we shouldn't need to do this check.
+        let aligned_type =
+            if pointee_ty == self.cx.u128_type || pointee_ty == self.cx.i128_type {
+                pointee_ty
+            }
+            else {
+                pointee_ty.get_aligned(align.bytes())
+            };
+        let ptr = self.context.new_cast(None, ptr, aligned_type.make_pointer());
         let deref = ptr.dereference(None).to_rvalue();
         unsafe { RETURN_VALUE_COUNT += 1 };
-        let loaded_value = function.new_local(None, pointee_ty, &format!("loadedValue{}", unsafe { RETURN_VALUE_COUNT }));
+        let loaded_value = function.new_local(None, aligned_type, &format!("loadedValue{}", unsafe { RETURN_VALUE_COUNT }));
         block.add_assignment(None, loaded_value, deref);
         loaded_value.to_rvalue()
     }
@@ -909,7 +934,9 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         self.context.new_bitcast(None, result, ptr_type)
     }
 
-    fn inbounds_gep(&mut self, _typ: Type<'gcc>, ptr: RValue<'gcc>, indices: &[RValue<'gcc>]) -> RValue<'gcc> {
+    fn inbounds_gep(&mut self, typ: Type<'gcc>, ptr: RValue<'gcc>, indices: &[RValue<'gcc>]) -> RValue<'gcc> {
+        // NOTE: due to opaque pointers now being used, we need to cast here.
+        let ptr = self.context.new_cast(None, ptr, typ.make_pointer());
         // NOTE: array indexing is always considered in bounds in GCC (TODO(antoyo): to be verified).
         let mut indices = indices.into_iter();
         let index = indices.next().expect("first index in inbounds_gep");
@@ -938,6 +965,8 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
             element.get_address(None)
         }
         else if let Some(struct_type) = value_type.is_struct() {
+            // NOTE: due to opaque pointers now being used, we need to bitcast here.
+            let ptr = self.bitcast_if_needed(ptr, value_type.make_pointer());
             ptr.dereference_field(None, struct_type.get_field(idx as i32)).get_address(None)
         }
         else {
@@ -1356,7 +1385,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
 
     fn call(
         &mut self,
-        _typ: Type<'gcc>,
+        typ: Type<'gcc>,
         _fn_attrs: Option<&CodegenFnAttrs>,
         fn_abi: Option<&FnAbi<'tcx, Ty<'tcx>>>,
         func: RValue<'gcc>,
@@ -1370,7 +1399,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         }
         else {
             // If it's a not function that was defined, it's a function pointer.
-            self.function_ptr_call(func, args, funclet)
+            self.function_ptr_call(typ, func, args, funclet)
         };
         if let Some(_fn_abi) = fn_abi {
             // TODO(bjorn3): Apply function attributes
@@ -1843,7 +1872,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
         #[cfg(feature="master")]
         let (cond, element_type) = {
-            let then_val_vector_type = then_val.get_type().dyncast_vector().expect("vector type");
+            // TODO(antoyo): dyncast_vector should not require a call to unqualified.
+            let then_val_vector_type = then_val.get_type().unqualified().dyncast_vector().expect("vector type");
             let then_val_element_type = then_val_vector_type.get_element_type();
             let then_val_element_size = then_val_element_type.get_size();
 
diff --git a/compiler/rustc_codegen_gcc/src/consts.rs b/compiler/rustc_codegen_gcc/src/consts.rs
index 33e3b0baa92..d8a1fd315c0 100644
--- a/compiler/rustc_codegen_gcc/src/consts.rs
+++ b/compiler/rustc_codegen_gcc/src/consts.rs
@@ -1,5 +1,5 @@
 #[cfg(feature = "master")]
-use gccjit::FnAttribute;
+use gccjit::{FnAttribute, VarAttribute, Visibility};
 use gccjit::{Function, GlobalKind, LValue, RValue, ToRValue};
 use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods, DerivedTypeMethods, StaticMethods};
 use rustc_middle::span_bug;
@@ -234,7 +234,8 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
             );
 
             if !self.tcx.is_reachable_non_generic(def_id) {
-                // TODO(antoyo): set visibility.
+                #[cfg(feature = "master")]
+                global.add_attribute(VarAttribute::Visibility(Visibility::Hidden));
             }
 
             global
diff --git a/compiler/rustc_codegen_gcc/src/declare.rs b/compiler/rustc_codegen_gcc/src/declare.rs
index 4748e7e4be2..493626c3cf5 100644
--- a/compiler/rustc_codegen_gcc/src/declare.rs
+++ b/compiler/rustc_codegen_gcc/src/declare.rs
@@ -132,7 +132,7 @@ fn declare_raw_fn<'gcc>(cx: &CodegenCx<'gcc, '_>, name: &str, _callconv: () /*ll
 pub fn mangle_name(name: &str) -> String {
     name.replace(|char: char| {
         if !char.is_alphanumeric() && char != '_' {
-            debug_assert!("$.".contains(char), "Unsupported char in function name: {}", char);
+            debug_assert!("$.*".contains(char), "Unsupported char in function name {}: {}", name, char);
             true
         }
         else {
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs b/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
index 8a4559355ea..438eab78943 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
@@ -2967,10 +2967,6 @@ match name {
     "llvm.nvvm.clz.ll" => "__nvvm_clz_ll",
     "llvm.nvvm.cos.approx.f" => "__nvvm_cos_approx_f",
     "llvm.nvvm.cos.approx.ftz.f" => "__nvvm_cos_approx_ftz_f",
-    "llvm.nvvm.cp.async.ca.shared.global.16" => "__nvvm_cp_async_ca_shared_global_16",
-    "llvm.nvvm.cp.async.ca.shared.global.4" => "__nvvm_cp_async_ca_shared_global_4",
-    "llvm.nvvm.cp.async.ca.shared.global.8" => "__nvvm_cp_async_ca_shared_global_8",
-    "llvm.nvvm.cp.async.cg.shared.global.16" => "__nvvm_cp_async_cg_shared_global_16",
     "llvm.nvvm.cp.async.commit.group" => "__nvvm_cp_async_commit_group",
     "llvm.nvvm.cp.async.mbarrier.arrive" => "__nvvm_cp_async_mbarrier_arrive",
     "llvm.nvvm.cp.async.mbarrier.arrive.noinc" => "__nvvm_cp_async_mbarrier_arrive_noinc",
@@ -3086,18 +3082,8 @@ match name {
     "llvm.nvvm.fma.rn.f16" => "__nvvm_fma_rn_f16",
     "llvm.nvvm.fma.rn.f16x2" => "__nvvm_fma_rn_f16x2",
     "llvm.nvvm.fma.rn.ftz.f" => "__nvvm_fma_rn_ftz_f",
-    "llvm.nvvm.fma.rn.ftz.f16" => "__nvvm_fma_rn_ftz_f16",
-    "llvm.nvvm.fma.rn.ftz.f16x2" => "__nvvm_fma_rn_ftz_f16x2",
-    "llvm.nvvm.fma.rn.ftz.relu.f16" => "__nvvm_fma_rn_ftz_relu_f16",
-    "llvm.nvvm.fma.rn.ftz.relu.f16x2" => "__nvvm_fma_rn_ftz_relu_f16x2",
-    "llvm.nvvm.fma.rn.ftz.sat.f16" => "__nvvm_fma_rn_ftz_sat_f16",
-    "llvm.nvvm.fma.rn.ftz.sat.f16x2" => "__nvvm_fma_rn_ftz_sat_f16x2",
     "llvm.nvvm.fma.rn.relu.bf16" => "__nvvm_fma_rn_relu_bf16",
     "llvm.nvvm.fma.rn.relu.bf16x2" => "__nvvm_fma_rn_relu_bf16x2",
-    "llvm.nvvm.fma.rn.relu.f16" => "__nvvm_fma_rn_relu_f16",
-    "llvm.nvvm.fma.rn.relu.f16x2" => "__nvvm_fma_rn_relu_f16x2",
-    "llvm.nvvm.fma.rn.sat.f16" => "__nvvm_fma_rn_sat_f16",
-    "llvm.nvvm.fma.rn.sat.f16x2" => "__nvvm_fma_rn_sat_f16x2",
     "llvm.nvvm.fma.rp.d" => "__nvvm_fma_rp_d",
     "llvm.nvvm.fma.rp.f" => "__nvvm_fma_rp_f",
     "llvm.nvvm.fma.rp.ftz.f" => "__nvvm_fma_rp_ftz_f",
@@ -3111,32 +3097,18 @@ match name {
     "llvm.nvvm.fmax.f16" => "__nvvm_fmax_f16",
     "llvm.nvvm.fmax.f16x2" => "__nvvm_fmax_f16x2",
     "llvm.nvvm.fmax.ftz.f" => "__nvvm_fmax_ftz_f",
-    "llvm.nvvm.fmax.ftz.f16" => "__nvvm_fmax_ftz_f16",
-    "llvm.nvvm.fmax.ftz.f16x2" => "__nvvm_fmax_ftz_f16x2",
     "llvm.nvvm.fmax.ftz.nan.f" => "__nvvm_fmax_ftz_nan_f",
-    "llvm.nvvm.fmax.ftz.nan.f16" => "__nvvm_fmax_ftz_nan_f16",
-    "llvm.nvvm.fmax.ftz.nan.f16x2" => "__nvvm_fmax_ftz_nan_f16x2",
     "llvm.nvvm.fmax.ftz.nan.xorsign.abs.f" => "__nvvm_fmax_ftz_nan_xorsign_abs_f",
-    "llvm.nvvm.fmax.ftz.nan.xorsign.abs.f16" => "__nvvm_fmax_ftz_nan_xorsign_abs_f16",
-    "llvm.nvvm.fmax.ftz.nan.xorsign.abs.f16x2" => "__nvvm_fmax_ftz_nan_xorsign_abs_f16x2",
     "llvm.nvvm.fmax.ftz.xorsign.abs.f" => "__nvvm_fmax_ftz_xorsign_abs_f",
-    "llvm.nvvm.fmax.ftz.xorsign.abs.f16" => "__nvvm_fmax_ftz_xorsign_abs_f16",
-    "llvm.nvvm.fmax.ftz.xorsign.abs.f16x2" => "__nvvm_fmax_ftz_xorsign_abs_f16x2",
     "llvm.nvvm.fmax.nan.bf16" => "__nvvm_fmax_nan_bf16",
     "llvm.nvvm.fmax.nan.bf16x2" => "__nvvm_fmax_nan_bf16x2",
     "llvm.nvvm.fmax.nan.f" => "__nvvm_fmax_nan_f",
-    "llvm.nvvm.fmax.nan.f16" => "__nvvm_fmax_nan_f16",
-    "llvm.nvvm.fmax.nan.f16x2" => "__nvvm_fmax_nan_f16x2",
     "llvm.nvvm.fmax.nan.xorsign.abs.bf16" => "__nvvm_fmax_nan_xorsign_abs_bf16",
     "llvm.nvvm.fmax.nan.xorsign.abs.bf16x2" => "__nvvm_fmax_nan_xorsign_abs_bf16x2",
     "llvm.nvvm.fmax.nan.xorsign.abs.f" => "__nvvm_fmax_nan_xorsign_abs_f",
-    "llvm.nvvm.fmax.nan.xorsign.abs.f16" => "__nvvm_fmax_nan_xorsign_abs_f16",
-    "llvm.nvvm.fmax.nan.xorsign.abs.f16x2" => "__nvvm_fmax_nan_xorsign_abs_f16x2",
     "llvm.nvvm.fmax.xorsign.abs.bf16" => "__nvvm_fmax_xorsign_abs_bf16",
     "llvm.nvvm.fmax.xorsign.abs.bf16x2" => "__nvvm_fmax_xorsign_abs_bf16x2",
     "llvm.nvvm.fmax.xorsign.abs.f" => "__nvvm_fmax_xorsign_abs_f",
-    "llvm.nvvm.fmax.xorsign.abs.f16" => "__nvvm_fmax_xorsign_abs_f16",
-    "llvm.nvvm.fmax.xorsign.abs.f16x2" => "__nvvm_fmax_xorsign_abs_f16x2",
     "llvm.nvvm.fmin.bf16" => "__nvvm_fmin_bf16",
     "llvm.nvvm.fmin.bf16x2" => "__nvvm_fmin_bf16x2",
     "llvm.nvvm.fmin.d" => "__nvvm_fmin_d",
@@ -3144,32 +3116,18 @@ match name {
     "llvm.nvvm.fmin.f16" => "__nvvm_fmin_f16",
     "llvm.nvvm.fmin.f16x2" => "__nvvm_fmin_f16x2",
     "llvm.nvvm.fmin.ftz.f" => "__nvvm_fmin_ftz_f",
-    "llvm.nvvm.fmin.ftz.f16" => "__nvvm_fmin_ftz_f16",
-    "llvm.nvvm.fmin.ftz.f16x2" => "__nvvm_fmin_ftz_f16x2",
     "llvm.nvvm.fmin.ftz.nan.f" => "__nvvm_fmin_ftz_nan_f",
-    "llvm.nvvm.fmin.ftz.nan.f16" => "__nvvm_fmin_ftz_nan_f16",
-    "llvm.nvvm.fmin.ftz.nan.f16x2" => "__nvvm_fmin_ftz_nan_f16x2",
     "llvm.nvvm.fmin.ftz.nan.xorsign.abs.f" => "__nvvm_fmin_ftz_nan_xorsign_abs_f",
-    "llvm.nvvm.fmin.ftz.nan.xorsign.abs.f16" => "__nvvm_fmin_ftz_nan_xorsign_abs_f16",
-    "llvm.nvvm.fmin.ftz.nan.xorsign.abs.f16x2" => "__nvvm_fmin_ftz_nan_xorsign_abs_f16x2",
     "llvm.nvvm.fmin.ftz.xorsign.abs.f" => "__nvvm_fmin_ftz_xorsign_abs_f",
-    "llvm.nvvm.fmin.ftz.xorsign.abs.f16" => "__nvvm_fmin_ftz_xorsign_abs_f16",
-    "llvm.nvvm.fmin.ftz.xorsign.abs.f16x2" => "__nvvm_fmin_ftz_xorsign_abs_f16x2",
     "llvm.nvvm.fmin.nan.bf16" => "__nvvm_fmin_nan_bf16",
     "llvm.nvvm.fmin.nan.bf16x2" => "__nvvm_fmin_nan_bf16x2",
     "llvm.nvvm.fmin.nan.f" => "__nvvm_fmin_nan_f",
-    "llvm.nvvm.fmin.nan.f16" => "__nvvm_fmin_nan_f16",
-    "llvm.nvvm.fmin.nan.f16x2" => "__nvvm_fmin_nan_f16x2",
     "llvm.nvvm.fmin.nan.xorsign.abs.bf16" => "__nvvm_fmin_nan_xorsign_abs_bf16",
     "llvm.nvvm.fmin.nan.xorsign.abs.bf16x2" => "__nvvm_fmin_nan_xorsign_abs_bf16x2",
     "llvm.nvvm.fmin.nan.xorsign.abs.f" => "__nvvm_fmin_nan_xorsign_abs_f",
-    "llvm.nvvm.fmin.nan.xorsign.abs.f16" => "__nvvm_fmin_nan_xorsign_abs_f16",
-    "llvm.nvvm.fmin.nan.xorsign.abs.f16x2" => "__nvvm_fmin_nan_xorsign_abs_f16x2",
     "llvm.nvvm.fmin.xorsign.abs.bf16" => "__nvvm_fmin_xorsign_abs_bf16",
     "llvm.nvvm.fmin.xorsign.abs.bf16x2" => "__nvvm_fmin_xorsign_abs_bf16x2",
     "llvm.nvvm.fmin.xorsign.abs.f" => "__nvvm_fmin_xorsign_abs_f",
-    "llvm.nvvm.fmin.xorsign.abs.f16" => "__nvvm_fmin_xorsign_abs_f16",
-    "llvm.nvvm.fmin.xorsign.abs.f16x2" => "__nvvm_fmin_xorsign_abs_f16x2",
     "llvm.nvvm.fns" => "__nvvm_fns",
     "llvm.nvvm.h2f" => "__nvvm_h2f",
     "llvm.nvvm.i2d.rm" => "__nvvm_i2d_rm",
@@ -7895,6 +7853,10 @@ match name {
     "llvm.x86.subborrow.u64" => "__builtin_ia32_subborrow_u64",
     "llvm.x86.tbm.bextri.u32" => "__builtin_ia32_bextri_u32",
     "llvm.x86.tbm.bextri.u64" => "__builtin_ia32_bextri_u64",
+    "llvm.x86.tcmmimfp16ps" => "__builtin_ia32_tcmmimfp16ps",
+    "llvm.x86.tcmmimfp16ps.internal" => "__builtin_ia32_tcmmimfp16ps_internal",
+    "llvm.x86.tcmmrlfp16ps" => "__builtin_ia32_tcmmrlfp16ps",
+    "llvm.x86.tcmmrlfp16ps.internal" => "__builtin_ia32_tcmmrlfp16ps_internal",
     "llvm.x86.tdpbf16ps" => "__builtin_ia32_tdpbf16ps",
     "llvm.x86.tdpbf16ps.internal" => "__builtin_ia32_tdpbf16ps_internal",
     "llvm.x86.tdpbssd" => "__builtin_ia32_tdpbssd",
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs b/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs
index 0edec566be3..f28348380d7 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs
@@ -313,6 +313,13 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
                 let new_args = args.to_vec();
                 args = vec![new_args[1], new_args[0], new_args[2], new_args[3], new_args[4]].into();
             },
+            "__builtin_ia32_vpshrdv_v8di" | "__builtin_ia32_vpshrdv_v4di" | "__builtin_ia32_vpshrdv_v2di" |
+                "__builtin_ia32_vpshrdv_v16si" | "__builtin_ia32_vpshrdv_v8si" | "__builtin_ia32_vpshrdv_v4si" |
+                "__builtin_ia32_vpshrdv_v32hi" | "__builtin_ia32_vpshrdv_v16hi" | "__builtin_ia32_vpshrdv_v8hi" => {
+                // The first two arguments are reversed, compared to LLVM.
+                let new_args = args.to_vec();
+                args = vec![new_args[1], new_args[0], new_args[2]].into();
+            },
             _ => (),
         }
     }
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
index 60176874747..a31fee39918 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
@@ -551,141 +551,52 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         let context = &self.cx.context;
         let result =
             match width {
-                8 => {
-                    // First step.
-                    let left = self.and(value, context.new_rvalue_from_int(typ, 0xF0));
-                    let left = self.lshr(left, context.new_rvalue_from_int(typ, 4));
-                    let right = self.and(value, context.new_rvalue_from_int(typ, 0x0F));
-                    let right = self.shl(right, context.new_rvalue_from_int(typ, 4));
-                    let step1 = self.or(left, right);
-
-                    // Second step.
-                    let left = self.and(step1, context.new_rvalue_from_int(typ, 0xCC));
-                    let left = self.lshr(left, context.new_rvalue_from_int(typ, 2));
-                    let right = self.and(step1, context.new_rvalue_from_int(typ, 0x33));
-                    let right = self.shl(right, context.new_rvalue_from_int(typ, 2));
-                    let step2 = self.or(left, right);
-
-                    // Third step.
-                    let left = self.and(step2, context.new_rvalue_from_int(typ, 0xAA));
-                    let left = self.lshr(left, context.new_rvalue_from_int(typ, 1));
-                    let right = self.and(step2, context.new_rvalue_from_int(typ, 0x55));
-                    let right = self.shl(right, context.new_rvalue_from_int(typ, 1));
-                    let step3 = self.or(left, right);
-
-                    step3
-                },
-                16 => {
-                    // First step.
-                    let left = self.and(value, context.new_rvalue_from_int(typ, 0x5555));
-                    let left = self.shl(left, context.new_rvalue_from_int(typ, 1));
-                    let right = self.and(value, context.new_rvalue_from_int(typ, 0xAAAA));
-                    let right = self.lshr(right, context.new_rvalue_from_int(typ, 1));
-                    let step1 = self.or(left, right);
-
-                    // Second step.
-                    let left = self.and(step1, context.new_rvalue_from_int(typ, 0x3333));
-                    let left = self.shl(left, context.new_rvalue_from_int(typ, 2));
-                    let right = self.and(step1, context.new_rvalue_from_int(typ, 0xCCCC));
-                    let right = self.lshr(right, context.new_rvalue_from_int(typ, 2));
-                    let step2 = self.or(left, right);
-
-                    // Third step.
-                    let left = self.and(step2, context.new_rvalue_from_int(typ, 0x0F0F));
-                    let left = self.shl(left, context.new_rvalue_from_int(typ, 4));
-                    let right = self.and(step2, context.new_rvalue_from_int(typ, 0xF0F0));
-                    let right = self.lshr(right, context.new_rvalue_from_int(typ, 4));
-                    let step3 = self.or(left, right);
-
-                    // Fourth step.
-                    let left = self.and(step3, context.new_rvalue_from_int(typ, 0x00FF));
-                    let left = self.shl(left, context.new_rvalue_from_int(typ, 8));
-                    let right = self.and(step3, context.new_rvalue_from_int(typ, 0xFF00));
-                    let right = self.lshr(right, context.new_rvalue_from_int(typ, 8));
-                    let step4 = self.or(left, right);
+                8 | 16 | 32 | 64 => {
+                    let mask = ((1u128 << width) - 1) as u64;
+                    let (m0, m1, m2) = if width > 16 {
+                        (
+                            context.new_rvalue_from_long(typ, (0x5555555555555555u64 & mask) as i64),
+                            context.new_rvalue_from_long(typ, (0x3333333333333333u64 & mask) as i64),
+                            context.new_rvalue_from_long(typ, (0x0f0f0f0f0f0f0f0fu64 & mask) as i64),
+                        )
+                    } else {
+                        (
+                            context.new_rvalue_from_int(typ, (0x5555u64 & mask) as i32),
+                            context.new_rvalue_from_int(typ, (0x3333u64 & mask) as i32),
+                            context.new_rvalue_from_int(typ, (0x0f0fu64 & mask) as i32),
+                        )
+                    };
+                    let one = context.new_rvalue_from_int(typ, 1);
+                    let two = context.new_rvalue_from_int(typ, 2);
+                    let four = context.new_rvalue_from_int(typ, 4);
 
-                    step4
-                },
-                32 => {
-                    // TODO(antoyo): Refactor with other implementations.
                     // First step.
-                    let left = self.and(value, context.new_rvalue_from_long(typ, 0x55555555));
-                    let left = self.shl(left, context.new_rvalue_from_long(typ, 1));
-                    let right = self.and(value, context.new_rvalue_from_long(typ, 0xAAAAAAAA));
-                    let right = self.lshr(right, context.new_rvalue_from_long(typ, 1));
+                    let left = self.lshr(value, one);
+                    let left = self.and(left, m0);
+                    let right = self.and(value, m0);
+                    let right = self.shl(right, one);
                     let step1 = self.or(left, right);
 
                     // Second step.
-                    let left = self.and(step1, context.new_rvalue_from_long(typ, 0x33333333));
-                    let left = self.shl(left, context.new_rvalue_from_long(typ, 2));
-                    let right = self.and(step1, context.new_rvalue_from_long(typ, 0xCCCCCCCC));
-                    let right = self.lshr(right, context.new_rvalue_from_long(typ, 2));
+                    let left = self.lshr(step1, two);
+                    let left = self.and(left, m1);
+                    let right = self.and(step1, m1);
+                    let right = self.shl(right, two);
                     let step2 = self.or(left, right);
 
                     // Third step.
-                    let left = self.and(step2, context.new_rvalue_from_long(typ, 0x0F0F0F0F));
-                    let left = self.shl(left, context.new_rvalue_from_long(typ, 4));
-                    let right = self.and(step2, context.new_rvalue_from_long(typ, 0xF0F0F0F0));
-                    let right = self.lshr(right, context.new_rvalue_from_long(typ, 4));
+                    let left = self.lshr(step2, four);
+                    let left = self.and(left, m2);
+                    let right = self.and(step2, m2);
+                    let right = self.shl(right, four);
                     let step3 = self.or(left, right);
 
                     // Fourth step.
-                    let left = self.and(step3, context.new_rvalue_from_long(typ, 0x00FF00FF));
-                    let left = self.shl(left, context.new_rvalue_from_long(typ, 8));
-                    let right = self.and(step3, context.new_rvalue_from_long(typ, 0xFF00FF00));
-                    let right = self.lshr(right, context.new_rvalue_from_long(typ, 8));
-                    let step4 = self.or(left, right);
-
-                    // Fifth step.
-                    let left = self.and(step4, context.new_rvalue_from_long(typ, 0x0000FFFF));
-                    let left = self.shl(left, context.new_rvalue_from_long(typ, 16));
-                    let right = self.and(step4, context.new_rvalue_from_long(typ, 0xFFFF0000));
-                    let right = self.lshr(right, context.new_rvalue_from_long(typ, 16));
-                    let step5 = self.or(left, right);
-
-                    step5
-                },
-                64 => {
-                    // First step.
-                    let left = self.shl(value, context.new_rvalue_from_long(typ, 32));
-                    let right = self.lshr(value, context.new_rvalue_from_long(typ, 32));
-                    let step1 = self.or(left, right);
-
-                    // Second step.
-                    let left = self.and(step1, context.new_rvalue_from_long(typ, 0x0001FFFF0001FFFF));
-                    let left = self.shl(left, context.new_rvalue_from_long(typ, 15));
-                    let right = self.and(step1, context.new_rvalue_from_long(typ, 0xFFFE0000FFFE0000u64 as i64)); // TODO(antoyo): transmute the number instead?
-                    let right = self.lshr(right, context.new_rvalue_from_long(typ, 17));
-                    let step2 = self.or(left, right);
-
-                    // Third step.
-                    let left = self.lshr(step2, context.new_rvalue_from_long(typ, 10));
-                    let left = self.xor(step2, left);
-                    let temp = self.and(left, context.new_rvalue_from_long(typ, 0x003F801F003F801F));
-
-                    let left = self.shl(temp, context.new_rvalue_from_long(typ, 10));
-                    let left = self.or(temp, left);
-                    let step3 = self.xor(left, step2);
-
-                    // Fourth step.
-                    let left = self.lshr(step3, context.new_rvalue_from_long(typ, 4));
-                    let left = self.xor(step3, left);
-                    let temp = self.and(left, context.new_rvalue_from_long(typ, 0x0E0384210E038421));
-
-                    let left = self.shl(temp, context.new_rvalue_from_long(typ, 4));
-                    let left = self.or(temp, left);
-                    let step4 = self.xor(left, step3);
-
-                    // Fifth step.
-                    let left = self.lshr(step4, context.new_rvalue_from_long(typ, 2));
-                    let left = self.xor(step4, left);
-                    let temp = self.and(left, context.new_rvalue_from_long(typ, 0x2248884222488842));
-
-                    let left = self.shl(temp, context.new_rvalue_from_long(typ, 2));
-                    let left = self.or(temp, left);
-                    let step5 = self.xor(left, step4);
-
-                    step5
+                    if width == 8 {
+                        step3
+                    } else {
+                        self.gcc_bswap(step3, width)
+                    }
                 },
                 128 => {
                     // TODO(antoyo): find a more efficient implementation?
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
index b59c3a64f57..9115cf97119 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
@@ -165,10 +165,15 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             InvalidMonomorphizationReturnIntegerType { span, name, ret_ty, out_ty }
         );
 
+        let arg1 = args[0].immediate();
+        // NOTE: we get different vector types for the same vector type and libgccjit doesn't
+        // compare them as equal, so bitcast.
+        // FIXME(antoyo): allow comparing vector types as equal in libgccjit.
+        let arg2 = bx.context.new_bitcast(None, args[1].immediate(), arg1.get_type());
         return Ok(compare_simd_types(
             bx,
-            args[0].immediate(),
-            args[1].immediate(),
+            arg1,
+            arg2,
             in_elem,
             llret_ty,
             cmp_op,
@@ -341,7 +346,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         // endian and MSB-first for big endian.
 
         let vector = args[0].immediate();
-        let vector_type = vector.get_type().dyncast_vector().expect("vector type");
+        // TODO(antoyo): dyncast_vector should not require a call to unqualified.
+        let vector_type = vector.get_type().unqualified().dyncast_vector().expect("vector type");
         let elem_type = vector_type.get_element_type();
 
         let expected_int_bits = in_len.max(8);
@@ -848,7 +854,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                 (true, true) => {
                     // Algorithm from: https://codereview.stackexchange.com/questions/115869/saturated-signed-addition
                     // TODO(antoyo): improve using conditional operators if possible.
-                    let arg_type = lhs.get_type();
+                    // TODO(antoyo): dyncast_vector should not require a call to unqualified.
+                    let arg_type = lhs.get_type().unqualified();
                     // TODO(antoyo): convert lhs and rhs to unsigned.
                     let sum = lhs + rhs;
                     let vector_type = arg_type.dyncast_vector().expect("vector type");
@@ -878,7 +885,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                     res & cmp
                 },
                 (true, false) => {
-                    let arg_type = lhs.get_type();
+                    // TODO(antoyo): dyncast_vector should not require a call to unqualified.
+                    let arg_type = lhs.get_type().unqualified();
                     // TODO(antoyo): this uses the same algorithm from saturating add, but add the
                     // negative of the right operand. Find a proper subtraction algorithm.
                     let rhs = bx.context.new_unary_op(None, UnaryOp::Minus, arg_type, rhs);
diff --git a/compiler/rustc_codegen_gcc/src/lib.rs b/compiler/rustc_codegen_gcc/src/lib.rs
index ea013c4428c..2a6b642782d 100644
--- a/compiler/rustc_codegen_gcc/src/lib.rs
+++ b/compiler/rustc_codegen_gcc/src/lib.rs
@@ -111,6 +111,8 @@ impl CodegenBackend for GccCodegenBackend {
     }
 
     fn init(&self, sess: &Session) {
+        #[cfg(feature="master")]
+        gccjit::set_global_personality_function_name(b"rust_eh_personality\0");
         if sess.lto() != Lto::No {
             sess.emit_warning(LTONotSupported {});
         }
diff --git a/compiler/rustc_codegen_gcc/src/type_of.rs b/compiler/rustc_codegen_gcc/src/type_of.rs
index 30a3fe67b85..74f016cf90a 100644
--- a/compiler/rustc_codegen_gcc/src/type_of.rs
+++ b/compiler/rustc_codegen_gcc/src/type_of.rs
@@ -383,8 +383,8 @@ impl<'gcc, 'tcx> LayoutTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
         unimplemented!();
     }
 
-    fn fn_decl_backend_type(&self, _fn_abi: &FnAbi<'tcx, Ty<'tcx>>) -> Type<'gcc> {
-        // FIXME(antoyo): return correct type.
-        self.type_void()
+    fn fn_decl_backend_type(&self, fn_abi: &FnAbi<'tcx, Ty<'tcx>>) -> Type<'gcc> {
+        let (return_type, param_types, variadic, _) = fn_abi.gcc_type(self);
+        self.context.new_function_pointer_type(None, return_type, &param_types, variadic)
     }
 }