4 files changed, 1164 insertions, 718 deletions
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs b/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
index 15d67385c3e..c4ae1751fa0 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
@@ -151,8 +151,10 @@ match name {
     "llvm.amdgcn.msad.u8" => "__builtin_amdgcn_msad_u8",
     "llvm.amdgcn.perm" => "__builtin_amdgcn_perm",
     "llvm.amdgcn.permlane16" => "__builtin_amdgcn_permlane16",
+    "llvm.amdgcn.permlane16.var" => "__builtin_amdgcn_permlane16_var",
     "llvm.amdgcn.permlane64" => "__builtin_amdgcn_permlane64",
     "llvm.amdgcn.permlanex16" => "__builtin_amdgcn_permlanex16",
+    "llvm.amdgcn.permlanex16.var" => "__builtin_amdgcn_permlanex16_var",
     "llvm.amdgcn.qsad.pk.u16.u8" => "__builtin_amdgcn_qsad_pk_u16_u8",
     "llvm.amdgcn.queue.ptr" => "__builtin_amdgcn_queue_ptr",
     "llvm.amdgcn.rcp.legacy" => "__builtin_amdgcn_rcp_legacy",
@@ -160,11 +162,20 @@ match name {
     "llvm.amdgcn.readlane" => "__builtin_amdgcn_readlane",
     "llvm.amdgcn.rsq.legacy" => "__builtin_amdgcn_rsq_legacy",
     "llvm.amdgcn.s.barrier" => "__builtin_amdgcn_s_barrier",
+    "llvm.amdgcn.s.barrier.init" => "__builtin_amdgcn_s_barrier_init",
+    "llvm.amdgcn.s.barrier.join" => "__builtin_amdgcn_s_barrier_join",
+    "llvm.amdgcn.s.barrier.leave" => "__builtin_amdgcn_s_barrier_leave",
+    "llvm.amdgcn.s.barrier.signal" => "__builtin_amdgcn_s_barrier_signal",
+    "llvm.amdgcn.s.barrier.signal.isfirst" => "__builtin_amdgcn_s_barrier_signal_isfirst",
+    "llvm.amdgcn.s.barrier.signal.isfirst.var" => "__builtin_amdgcn_s_barrier_signal_isfirst_var",
+    "llvm.amdgcn.s.barrier.signal.var" => "__builtin_amdgcn_s_barrier_signal_var",
+    "llvm.amdgcn.s.barrier.wait" => "__builtin_amdgcn_s_barrier_wait",
     "llvm.amdgcn.s.dcache.inv" => "__builtin_amdgcn_s_dcache_inv",
     "llvm.amdgcn.s.dcache.inv.vol" => "__builtin_amdgcn_s_dcache_inv_vol",
     "llvm.amdgcn.s.dcache.wb" => "__builtin_amdgcn_s_dcache_wb",
     "llvm.amdgcn.s.dcache.wb.vol" => "__builtin_amdgcn_s_dcache_wb_vol",
     "llvm.amdgcn.s.decperflevel" => "__builtin_amdgcn_s_decperflevel",
+    "llvm.amdgcn.s.get.barrier.state" => "__builtin_amdgcn_s_get_barrier_state",
     "llvm.amdgcn.s.get.waveid.in.workgroup" => "__builtin_amdgcn_s_get_waveid_in_workgroup",
     "llvm.amdgcn.s.getpc" => "__builtin_amdgcn_s_getpc",
     "llvm.amdgcn.s.getreg" => "__builtin_amdgcn_s_getreg",
@@ -176,8 +187,10 @@ match name {
     "llvm.amdgcn.s.setprio" => "__builtin_amdgcn_s_setprio",
     "llvm.amdgcn.s.setreg" => "__builtin_amdgcn_s_setreg",
     "llvm.amdgcn.s.sleep" => "__builtin_amdgcn_s_sleep",
+    "llvm.amdgcn.s.sleep.var" => "__builtin_amdgcn_s_sleep_var",
     "llvm.amdgcn.s.wait.event.export.ready" => "__builtin_amdgcn_s_wait_event_export_ready",
     "llvm.amdgcn.s.waitcnt" => "__builtin_amdgcn_s_waitcnt",
+    "llvm.amdgcn.s.wakeup.barrier" => "__builtin_amdgcn_s_wakeup_barrier",
     "llvm.amdgcn.sad.hi.u8" => "__builtin_amdgcn_sad_hi_u8",
     "llvm.amdgcn.sad.u16" => "__builtin_amdgcn_sad_u16",
     "llvm.amdgcn.sad.u8" => "__builtin_amdgcn_sad_u8",
@@ -314,6 +327,8 @@ match name {
     // bpf
     "llvm.bpf.btf.type.id" => "__builtin_bpf_btf_type_id",
     "llvm.bpf.compare" => "__builtin_bpf_compare",
+    "llvm.bpf.getelementptr.and.load" => "__builtin_bpf_getelementptr_and_load",
+    "llvm.bpf.getelementptr.and.store" => "__builtin_bpf_getelementptr_and_store",
     "llvm.bpf.load.byte" => "__builtin_bpf_load_byte",
     "llvm.bpf.load.half" => "__builtin_bpf_load_half",
     "llvm.bpf.load.word" => "__builtin_bpf_load_word",
@@ -5776,14 +5791,6 @@ match name {
     "llvm.s390.verimf" => "__builtin_s390_verimf",
     "llvm.s390.verimg" => "__builtin_s390_verimg",
     "llvm.s390.verimh" => "__builtin_s390_verimh",
-    "llvm.s390.verllb" => "__builtin_s390_verllb",
-    "llvm.s390.verllf" => "__builtin_s390_verllf",
-    "llvm.s390.verllg" => "__builtin_s390_verllg",
-    "llvm.s390.verllh" => "__builtin_s390_verllh",
-    "llvm.s390.verllvb" => "__builtin_s390_verllvb",
-    "llvm.s390.verllvf" => "__builtin_s390_verllvf",
-    "llvm.s390.verllvg" => "__builtin_s390_verllvg",
-    "llvm.s390.verllvh" => "__builtin_s390_verllvh",
     "llvm.s390.vfaeb" => "__builtin_s390_vfaeb",
     "llvm.s390.vfaef" => "__builtin_s390_vfaef",
     "llvm.s390.vfaeh" => "__builtin_s390_vfaeh",
@@ -5815,7 +5822,7 @@ match name {
     "llvm.s390.vistrh" => "__builtin_s390_vistrh",
     "llvm.s390.vlbb" => "__builtin_s390_vlbb",
     "llvm.s390.vll" => "__builtin_s390_vll",
-    "llvm.s390.vlrl" => "__builtin_s390_vlrl",
+    "llvm.s390.vlrl" => "__builtin_s390_vlrlr",
     "llvm.s390.vmaeb" => "__builtin_s390_vmaeb",
     "llvm.s390.vmaef" => "__builtin_s390_vmaef",
     "llvm.s390.vmaeh" => "__builtin_s390_vmaeh",
@@ -5885,7 +5892,7 @@ match name {
     "llvm.s390.vstrczb" => "__builtin_s390_vstrczb",
     "llvm.s390.vstrczf" => "__builtin_s390_vstrczf",
     "llvm.s390.vstrczh" => "__builtin_s390_vstrczh",
-    "llvm.s390.vstrl" => "__builtin_s390_vstrl",
+    "llvm.s390.vstrl" => "__builtin_s390_vstrlr",
     "llvm.s390.vsumb" => "__builtin_s390_vsumb",
     "llvm.s390.vsumgf" => "__builtin_s390_vsumgf",
     "llvm.s390.vsumgh" => "__builtin_s390_vsumgh",
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs b/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs
index 35eb4a11005..ce8dee69a98 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs
@@ -3,94 +3,185 @@ use std::borrow::Cow;
 use gccjit::{Function, FunctionPtrType, RValue, ToRValue, UnaryOp};
 use rustc_codegen_ssa::traits::BuilderMethods;
 
-use crate::{context::CodegenCx, builder::Builder};
+use crate::{builder::Builder, context::CodegenCx};
 
-pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc, 'tcx>, gcc_func: FunctionPtrType<'gcc>, mut args: Cow<'b, [RValue<'gcc>]>, func_name: &str, original_function_name: Option<&String>) -> Cow<'b, [RValue<'gcc>]> {
+pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
+    builder: &Builder<'a, 'gcc, 'tcx>,
+    gcc_func: FunctionPtrType<'gcc>,
+    mut args: Cow<'b, [RValue<'gcc>]>,
+    func_name: &str,
+    original_function_name: Option<&String>,
+) -> Cow<'b, [RValue<'gcc>]> {
     // Some LLVM intrinsics do not map 1-to-1 to GCC intrinsics, so we add the missing
     // arguments here.
     if gcc_func.get_param_count() != args.len() {
         match &*func_name {
             // NOTE: the following intrinsics have a different number of parameters in LLVM and GCC.
-            "__builtin_ia32_prold512_mask" | "__builtin_ia32_pmuldq512_mask" | "__builtin_ia32_pmuludq512_mask"
-                | "__builtin_ia32_pmaxsd512_mask" | "__builtin_ia32_pmaxsq512_mask" | "__builtin_ia32_pmaxsq256_mask"
-                | "__builtin_ia32_pmaxsq128_mask" | "__builtin_ia32_pmaxud512_mask" | "__builtin_ia32_pmaxuq512_mask"
-                | "__builtin_ia32_pminsd512_mask" | "__builtin_ia32_pminsq512_mask" | "__builtin_ia32_pminsq256_mask"
-                | "__builtin_ia32_pminsq128_mask" | "__builtin_ia32_pminud512_mask" | "__builtin_ia32_pminuq512_mask"
-                | "__builtin_ia32_prolq512_mask" | "__builtin_ia32_prorq512_mask" | "__builtin_ia32_pslldi512_mask"
-                | "__builtin_ia32_psrldi512_mask" | "__builtin_ia32_psllqi512_mask" | "__builtin_ia32_psrlqi512_mask"
-                | "__builtin_ia32_pslld512_mask" | "__builtin_ia32_psrld512_mask" | "__builtin_ia32_psllq512_mask"
-                | "__builtin_ia32_psrlq512_mask" | "__builtin_ia32_psrad512_mask" | "__builtin_ia32_psraq512_mask"
-                | "__builtin_ia32_psradi512_mask" | "__builtin_ia32_psraqi512_mask" | "__builtin_ia32_psrav16si_mask"
-                | "__builtin_ia32_psrav8di_mask" | "__builtin_ia32_prolvd512_mask" | "__builtin_ia32_prorvd512_mask"
-                | "__builtin_ia32_prolvq512_mask" | "__builtin_ia32_prorvq512_mask" | "__builtin_ia32_psllv16si_mask"
-                | "__builtin_ia32_psrlv16si_mask" | "__builtin_ia32_psllv8di_mask" | "__builtin_ia32_psrlv8di_mask"
-                | "__builtin_ia32_permvarsi512_mask" | "__builtin_ia32_vpermilvarps512_mask"
-                | "__builtin_ia32_vpermilvarpd512_mask" | "__builtin_ia32_permvardi512_mask"
-                | "__builtin_ia32_permvarsf512_mask" | "__builtin_ia32_permvarqi512_mask"
-                | "__builtin_ia32_permvarqi256_mask" | "__builtin_ia32_permvarqi128_mask"
-                | "__builtin_ia32_vpmultishiftqb512_mask" | "__builtin_ia32_vpmultishiftqb256_mask"
-                | "__builtin_ia32_vpmultishiftqb128_mask"
-                => {
+            "__builtin_ia32_prold512_mask"
+            | "__builtin_ia32_pmuldq512_mask"
+            | "__builtin_ia32_pmuludq512_mask"
+            | "__builtin_ia32_pmaxsd512_mask"
+            | "__builtin_ia32_pmaxsq512_mask"
+            | "__builtin_ia32_pmaxsq256_mask"
+            | "__builtin_ia32_pmaxsq128_mask"
+            | "__builtin_ia32_pmaxud512_mask"
+            | "__builtin_ia32_pmaxuq512_mask"
+            | "__builtin_ia32_pminsd512_mask"
+            | "__builtin_ia32_pminsq512_mask"
+            | "__builtin_ia32_pminsq256_mask"
+            | "__builtin_ia32_pminsq128_mask"
+            | "__builtin_ia32_pminud512_mask"
+            | "__builtin_ia32_pminuq512_mask"
+            | "__builtin_ia32_prolq512_mask"
+            | "__builtin_ia32_prorq512_mask"
+            | "__builtin_ia32_pslldi512_mask"
+            | "__builtin_ia32_psrldi512_mask"
+            | "__builtin_ia32_psllqi512_mask"
+            | "__builtin_ia32_psrlqi512_mask"
+            | "__builtin_ia32_pslld512_mask"
+            | "__builtin_ia32_psrld512_mask"
+            | "__builtin_ia32_psllq512_mask"
+            | "__builtin_ia32_psrlq512_mask"
+            | "__builtin_ia32_psrad512_mask"
+            | "__builtin_ia32_psraq512_mask"
+            | "__builtin_ia32_psradi512_mask"
+            | "__builtin_ia32_psraqi512_mask"
+            | "__builtin_ia32_psrav16si_mask"
+            | "__builtin_ia32_psrav8di_mask"
+            | "__builtin_ia32_prolvd512_mask"
+            | "__builtin_ia32_prorvd512_mask"
+            | "__builtin_ia32_prolvq512_mask"
+            | "__builtin_ia32_prorvq512_mask"
+            | "__builtin_ia32_psllv16si_mask"
+            | "__builtin_ia32_psrlv16si_mask"
+            | "__builtin_ia32_psllv8di_mask"
+            | "__builtin_ia32_psrlv8di_mask"
+            | "__builtin_ia32_permvarsi512_mask"
+            | "__builtin_ia32_vpermilvarps512_mask"
+            | "__builtin_ia32_vpermilvarpd512_mask"
+            | "__builtin_ia32_permvardi512_mask"
+            | "__builtin_ia32_permvarsf512_mask"
+            | "__builtin_ia32_permvarqi512_mask"
+            | "__builtin_ia32_permvarqi256_mask"
+            | "__builtin_ia32_permvarqi128_mask"
+            | "__builtin_ia32_vpmultishiftqb512_mask"
+            | "__builtin_ia32_vpmultishiftqb256_mask"
+            | "__builtin_ia32_vpmultishiftqb128_mask" => {
                 let mut new_args = args.to_vec();
                 let arg3_type = gcc_func.get_param_type(2);
-                let first_arg = builder.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue();
+                let first_arg = builder
+                    .current_func()
+                    .new_local(None, arg3_type, "undefined_for_intrinsic")
+                    .to_rvalue();
                 new_args.push(first_arg);
                 let arg4_type = gcc_func.get_param_type(3);
                 let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
                 new_args.push(minus_one);
                 args = new_args.into();
-            },
-            "__builtin_ia32_pmaxuq256_mask" | "__builtin_ia32_pmaxuq128_mask" | "__builtin_ia32_pminuq256_mask"
-                | "__builtin_ia32_pminuq128_mask" | "__builtin_ia32_prold256_mask" | "__builtin_ia32_prold128_mask"
-                | "__builtin_ia32_prord512_mask" | "__builtin_ia32_prord256_mask" | "__builtin_ia32_prord128_mask"
-                | "__builtin_ia32_prolq256_mask" | "__builtin_ia32_prolq128_mask" | "__builtin_ia32_prorq256_mask"
-                | "__builtin_ia32_prorq128_mask" | "__builtin_ia32_psraq256_mask" | "__builtin_ia32_psraq128_mask"
-                | "__builtin_ia32_psraqi256_mask" | "__builtin_ia32_psraqi128_mask" | "__builtin_ia32_psravq256_mask"
-                | "__builtin_ia32_psravq128_mask" | "__builtin_ia32_prolvd256_mask" | "__builtin_ia32_prolvd128_mask"
-                | "__builtin_ia32_prorvd256_mask" | "__builtin_ia32_prorvd128_mask" | "__builtin_ia32_prolvq256_mask"
-                | "__builtin_ia32_prolvq128_mask" | "__builtin_ia32_prorvq256_mask" | "__builtin_ia32_prorvq128_mask"
-                | "__builtin_ia32_permvardi256_mask" | "__builtin_ia32_permvardf512_mask" | "__builtin_ia32_permvardf256_mask"
-                | "__builtin_ia32_pmulhuw512_mask" | "__builtin_ia32_pmulhw512_mask" | "__builtin_ia32_pmulhrsw512_mask"
-                | "__builtin_ia32_pmaxuw512_mask" | "__builtin_ia32_pmaxub512_mask" | "__builtin_ia32_pmaxsw512_mask"
-                | "__builtin_ia32_pmaxsb512_mask" | "__builtin_ia32_pminuw512_mask" | "__builtin_ia32_pminub512_mask"
-                | "__builtin_ia32_pminsw512_mask" | "__builtin_ia32_pminsb512_mask"
-                | "__builtin_ia32_pmaddwd512_mask" | "__builtin_ia32_pmaddubsw512_mask" | "__builtin_ia32_packssdw512_mask"
-                | "__builtin_ia32_packsswb512_mask" | "__builtin_ia32_packusdw512_mask" | "__builtin_ia32_packuswb512_mask"
-                | "__builtin_ia32_pavgw512_mask" | "__builtin_ia32_pavgb512_mask" | "__builtin_ia32_psllw512_mask"
-                | "__builtin_ia32_psllwi512_mask" | "__builtin_ia32_psllv32hi_mask" | "__builtin_ia32_psrlw512_mask"
-                | "__builtin_ia32_psrlwi512_mask" | "__builtin_ia32_psllv16hi_mask" | "__builtin_ia32_psllv8hi_mask"
-                | "__builtin_ia32_psrlv32hi_mask" | "__builtin_ia32_psraw512_mask" | "__builtin_ia32_psrawi512_mask"
-                | "__builtin_ia32_psrlv16hi_mask" | "__builtin_ia32_psrlv8hi_mask" | "__builtin_ia32_psrav32hi_mask"
-                | "__builtin_ia32_permvarhi512_mask" | "__builtin_ia32_pshufb512_mask" | "__builtin_ia32_psrav16hi_mask"
-                | "__builtin_ia32_psrav8hi_mask" | "__builtin_ia32_permvarhi256_mask" | "__builtin_ia32_permvarhi128_mask"
-                => {
+            }
+            "__builtin_ia32_pmaxuq256_mask"
+            | "__builtin_ia32_pmaxuq128_mask"
+            | "__builtin_ia32_pminuq256_mask"
+            | "__builtin_ia32_pminuq128_mask"
+            | "__builtin_ia32_prold256_mask"
+            | "__builtin_ia32_prold128_mask"
+            | "__builtin_ia32_prord512_mask"
+            | "__builtin_ia32_prord256_mask"
+            | "__builtin_ia32_prord128_mask"
+            | "__builtin_ia32_prolq256_mask"
+            | "__builtin_ia32_prolq128_mask"
+            | "__builtin_ia32_prorq256_mask"
+            | "__builtin_ia32_prorq128_mask"
+            | "__builtin_ia32_psraq256_mask"
+            | "__builtin_ia32_psraq128_mask"
+            | "__builtin_ia32_psraqi256_mask"
+            | "__builtin_ia32_psraqi128_mask"
+            | "__builtin_ia32_psravq256_mask"
+            | "__builtin_ia32_psravq128_mask"
+            | "__builtin_ia32_prolvd256_mask"
+            | "__builtin_ia32_prolvd128_mask"
+            | "__builtin_ia32_prorvd256_mask"
+            | "__builtin_ia32_prorvd128_mask"
+            | "__builtin_ia32_prolvq256_mask"
+            | "__builtin_ia32_prolvq128_mask"
+            | "__builtin_ia32_prorvq256_mask"
+            | "__builtin_ia32_prorvq128_mask"
+            | "__builtin_ia32_permvardi256_mask"
+            | "__builtin_ia32_permvardf512_mask"
+            | "__builtin_ia32_permvardf256_mask"
+            | "__builtin_ia32_pmulhuw512_mask"
+            | "__builtin_ia32_pmulhw512_mask"
+            | "__builtin_ia32_pmulhrsw512_mask"
+            | "__builtin_ia32_pmaxuw512_mask"
+            | "__builtin_ia32_pmaxub512_mask"
+            | "__builtin_ia32_pmaxsw512_mask"
+            | "__builtin_ia32_pmaxsb512_mask"
+            | "__builtin_ia32_pminuw512_mask"
+            | "__builtin_ia32_pminub512_mask"
+            | "__builtin_ia32_pminsw512_mask"
+            | "__builtin_ia32_pminsb512_mask"
+            | "__builtin_ia32_pmaddwd512_mask"
+            | "__builtin_ia32_pmaddubsw512_mask"
+            | "__builtin_ia32_packssdw512_mask"
+            | "__builtin_ia32_packsswb512_mask"
+            | "__builtin_ia32_packusdw512_mask"
+            | "__builtin_ia32_packuswb512_mask"
+            | "__builtin_ia32_pavgw512_mask"
+            | "__builtin_ia32_pavgb512_mask"
+            | "__builtin_ia32_psllw512_mask"
+            | "__builtin_ia32_psllwi512_mask"
+            | "__builtin_ia32_psllv32hi_mask"
+            | "__builtin_ia32_psrlw512_mask"
+            | "__builtin_ia32_psrlwi512_mask"
+            | "__builtin_ia32_psllv16hi_mask"
+            | "__builtin_ia32_psllv8hi_mask"
+            | "__builtin_ia32_psrlv32hi_mask"
+            | "__builtin_ia32_psraw512_mask"
+            | "__builtin_ia32_psrawi512_mask"
+            | "__builtin_ia32_psrlv16hi_mask"
+            | "__builtin_ia32_psrlv8hi_mask"
+            | "__builtin_ia32_psrav32hi_mask"
+            | "__builtin_ia32_permvarhi512_mask"
+            | "__builtin_ia32_pshufb512_mask"
+            | "__builtin_ia32_psrav16hi_mask"
+            | "__builtin_ia32_psrav8hi_mask"
+            | "__builtin_ia32_permvarhi256_mask"
+            | "__builtin_ia32_permvarhi128_mask" => {
                 let mut new_args = args.to_vec();
                 let arg3_type = gcc_func.get_param_type(2);
                 let vector_type = arg3_type.dyncast_vector().expect("vector type");
                 let zero = builder.context.new_rvalue_zero(vector_type.get_element_type());
                 let num_units = vector_type.get_num_units();
-                let first_arg = builder.context.new_rvalue_from_vector(None, arg3_type, &vec![zero; num_units]);
+                let first_arg =
+                    builder.context.new_rvalue_from_vector(None, arg3_type, &vec![zero; num_units]);
                 new_args.push(first_arg);
                 let arg4_type = gcc_func.get_param_type(3);
                 let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
                 new_args.push(minus_one);
                 args = new_args.into();
-            },
-            "__builtin_ia32_dbpsadbw512_mask" | "__builtin_ia32_dbpsadbw256_mask" | "__builtin_ia32_dbpsadbw128_mask" => {
+            }
+            "__builtin_ia32_dbpsadbw512_mask"
+            | "__builtin_ia32_dbpsadbw256_mask"
+            | "__builtin_ia32_dbpsadbw128_mask" => {
                 let mut new_args = args.to_vec();
                 let arg4_type = gcc_func.get_param_type(3);
                 let vector_type = arg4_type.dyncast_vector().expect("vector type");
                 let zero = builder.context.new_rvalue_zero(vector_type.get_element_type());
                 let num_units = vector_type.get_num_units();
-                let first_arg = builder.context.new_rvalue_from_vector(None, arg4_type, &vec![zero; num_units]);
+                let first_arg =
+                    builder.context.new_rvalue_from_vector(None, arg4_type, &vec![zero; num_units]);
                 new_args.push(first_arg);
                 let arg5_type = gcc_func.get_param_type(4);
                 let minus_one = builder.context.new_rvalue_from_int(arg5_type, -1);
                 new_args.push(minus_one);
                 args = new_args.into();
-            },
-            "__builtin_ia32_vplzcntd_512_mask" | "__builtin_ia32_vplzcntd_256_mask" | "__builtin_ia32_vplzcntd_128_mask"
-                | "__builtin_ia32_vplzcntq_512_mask" | "__builtin_ia32_vplzcntq_256_mask" | "__builtin_ia32_vplzcntq_128_mask" => {
+            }
+            "__builtin_ia32_vplzcntd_512_mask"
+            | "__builtin_ia32_vplzcntd_256_mask"
+            | "__builtin_ia32_vplzcntd_128_mask"
+            | "__builtin_ia32_vplzcntq_512_mask"
+            | "__builtin_ia32_vplzcntq_256_mask"
+            | "__builtin_ia32_vplzcntq_128_mask" => {
                 let mut new_args = args.to_vec();
                 // Remove last arg as it doesn't seem to be used in GCC and is always false.
                 new_args.pop();
@@ -98,37 +189,45 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
                 let vector_type = arg2_type.dyncast_vector().expect("vector type");
                 let zero = builder.context.new_rvalue_zero(vector_type.get_element_type());
                 let num_units = vector_type.get_num_units();
-                let first_arg = builder.context.new_rvalue_from_vector(None, arg2_type, &vec![zero; num_units]);
+                let first_arg =
+                    builder.context.new_rvalue_from_vector(None, arg2_type, &vec![zero; num_units]);
                 new_args.push(first_arg);
                 let arg3_type = gcc_func.get_param_type(2);
                 let minus_one = builder.context.new_rvalue_from_int(arg3_type, -1);
                 new_args.push(minus_one);
                 args = new_args.into();
-            },
-            "__builtin_ia32_vpconflictsi_512_mask" | "__builtin_ia32_vpconflictsi_256_mask"
-                | "__builtin_ia32_vpconflictsi_128_mask" | "__builtin_ia32_vpconflictdi_512_mask"
-                | "__builtin_ia32_vpconflictdi_256_mask" | "__builtin_ia32_vpconflictdi_128_mask" => {
+            }
+            "__builtin_ia32_vpconflictsi_512_mask"
+            | "__builtin_ia32_vpconflictsi_256_mask"
+            | "__builtin_ia32_vpconflictsi_128_mask"
+            | "__builtin_ia32_vpconflictdi_512_mask"
+            | "__builtin_ia32_vpconflictdi_256_mask"
+            | "__builtin_ia32_vpconflictdi_128_mask" => {
                 let mut new_args = args.to_vec();
                 let arg2_type = gcc_func.get_param_type(1);
                 let vector_type = arg2_type.dyncast_vector().expect("vector type");
                 let zero = builder.context.new_rvalue_zero(vector_type.get_element_type());
                 let num_units = vector_type.get_num_units();
-                let first_arg = builder.context.new_rvalue_from_vector(None, arg2_type, &vec![zero; num_units]);
+                let first_arg =
+                    builder.context.new_rvalue_from_vector(None, arg2_type, &vec![zero; num_units]);
                 new_args.push(first_arg);
                 let arg3_type = gcc_func.get_param_type(2);
                 let minus_one = builder.context.new_rvalue_from_int(arg3_type, -1);
                 new_args.push(minus_one);
                 args = new_args.into();
-            },
-            "__builtin_ia32_pternlogd512_mask" | "__builtin_ia32_pternlogd256_mask"
-                | "__builtin_ia32_pternlogd128_mask" | "__builtin_ia32_pternlogq512_mask"
-                | "__builtin_ia32_pternlogq256_mask" | "__builtin_ia32_pternlogq128_mask" => {
+            }
+            "__builtin_ia32_pternlogd512_mask"
+            | "__builtin_ia32_pternlogd256_mask"
+            | "__builtin_ia32_pternlogd128_mask"
+            | "__builtin_ia32_pternlogq512_mask"
+            | "__builtin_ia32_pternlogq256_mask"
+            | "__builtin_ia32_pternlogq128_mask" => {
                 let mut new_args = args.to_vec();
                 let arg5_type = gcc_func.get_param_type(4);
                 let minus_one = builder.context.new_rvalue_from_int(arg5_type, -1);
                 new_args.push(minus_one);
                 args = new_args.into();
-            },
+            }
             "__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
                 let mut new_args = args.to_vec();
 
@@ -154,24 +253,33 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
                 }
 
                 args = new_args.into();
-            },
-            "__builtin_ia32_addps512_mask" | "__builtin_ia32_addpd512_mask"
-                | "__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask"
-                | "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask"
-                | "__builtin_ia32_divps512_mask" | "__builtin_ia32_divpd512_mask"
-                | "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
-                |  "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" => {
+            }
+            "__builtin_ia32_addps512_mask"
+            | "__builtin_ia32_addpd512_mask"
+            | "__builtin_ia32_subps512_mask"
+            | "__builtin_ia32_subpd512_mask"
+            | "__builtin_ia32_mulps512_mask"
+            | "__builtin_ia32_mulpd512_mask"
+            | "__builtin_ia32_divps512_mask"
+            | "__builtin_ia32_divpd512_mask"
+            | "__builtin_ia32_maxps512_mask"
+            | "__builtin_ia32_maxpd512_mask"
+            | "__builtin_ia32_minps512_mask"
+            | "__builtin_ia32_minpd512_mask" => {
                 let mut new_args = args.to_vec();
                 let last_arg = new_args.pop().expect("last arg");
                 let arg3_type = gcc_func.get_param_type(2);
-                let undefined = builder.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue();
+                let undefined = builder
+                    .current_func()
+                    .new_local(None, arg3_type, "undefined_for_intrinsic")
+                    .to_rvalue();
                 new_args.push(undefined);
                 let arg4_type = gcc_func.get_param_type(3);
                 let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
                 new_args.push(minus_one);
                 new_args.push(last_arg);
                 args = new_args.into();
-            },
+            }
             "__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask" => {
                 let mut new_args = args.to_vec();
                 let last_arg = new_args.pop().expect("last arg");
@@ -180,54 +288,72 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
                 new_args.push(minus_one);
                 new_args.push(last_arg);
                 args = new_args.into();
-            },
-            "__builtin_ia32_vpermi2vard512_mask" | "__builtin_ia32_vpermi2vard256_mask"
-                | "__builtin_ia32_vpermi2vard128_mask" | "__builtin_ia32_vpermi2varq512_mask"
-                | "__builtin_ia32_vpermi2varq256_mask" | "__builtin_ia32_vpermi2varq128_mask"
-                | "__builtin_ia32_vpermi2varps512_mask" | "__builtin_ia32_vpermi2varps256_mask"
-                | "__builtin_ia32_vpermi2varps128_mask" | "__builtin_ia32_vpermi2varpd512_mask"
-                | "__builtin_ia32_vpermi2varpd256_mask" | "__builtin_ia32_vpermi2varpd128_mask" | "__builtin_ia32_vpmadd52huq512_mask"
-                | "__builtin_ia32_vpmadd52luq512_mask" | "__builtin_ia32_vpmadd52huq256_mask" | "__builtin_ia32_vpmadd52luq256_mask"
-                | "__builtin_ia32_vpmadd52huq128_mask"
-                => {
+            }
+            "__builtin_ia32_vpermi2vard512_mask"
+            | "__builtin_ia32_vpermi2vard256_mask"
+            | "__builtin_ia32_vpermi2vard128_mask"
+            | "__builtin_ia32_vpermi2varq512_mask"
+            | "__builtin_ia32_vpermi2varq256_mask"
+            | "__builtin_ia32_vpermi2varq128_mask"
+            | "__builtin_ia32_vpermi2varps512_mask"
+            | "__builtin_ia32_vpermi2varps256_mask"
+            | "__builtin_ia32_vpermi2varps128_mask"
+            | "__builtin_ia32_vpermi2varpd512_mask"
+            | "__builtin_ia32_vpermi2varpd256_mask"
+            | "__builtin_ia32_vpermi2varpd128_mask"
+            | "__builtin_ia32_vpmadd52huq512_mask"
+            | "__builtin_ia32_vpmadd52luq512_mask"
+            | "__builtin_ia32_vpmadd52huq256_mask"
+            | "__builtin_ia32_vpmadd52luq256_mask"
+            | "__builtin_ia32_vpmadd52huq128_mask" => {
                 let mut new_args = args.to_vec();
                 let arg4_type = gcc_func.get_param_type(3);
                 let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
                 new_args.push(minus_one);
                 args = new_args.into();
-            },
-            "__builtin_ia32_cvtdq2ps512_mask" | "__builtin_ia32_cvtudq2ps512_mask"
-                | "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => {
+            }
+            "__builtin_ia32_cvtdq2ps512_mask"
+            | "__builtin_ia32_cvtudq2ps512_mask"
+            | "__builtin_ia32_sqrtps512_mask"
+            | "__builtin_ia32_sqrtpd512_mask" => {
                 let mut new_args = args.to_vec();
                 let last_arg = new_args.pop().expect("last arg");
                 let arg2_type = gcc_func.get_param_type(1);
-                let undefined = builder.current_func().new_local(None, arg2_type, "undefined_for_intrinsic").to_rvalue();
+                let undefined = builder
+                    .current_func()
+                    .new_local(None, arg2_type, "undefined_for_intrinsic")
+                    .to_rvalue();
                 new_args.push(undefined);
                 let arg3_type = gcc_func.get_param_type(2);
                 let minus_one = builder.context.new_rvalue_from_int(arg3_type, -1);
                 new_args.push(minus_one);
                 new_args.push(last_arg);
                 args = new_args.into();
-            },
+            }
             "__builtin_ia32_stmxcsr" => {
                 args = vec![].into();
-            },
-            "__builtin_ia32_addcarryx_u64" | "__builtin_ia32_sbb_u64" | "__builtin_ia32_addcarryx_u32" | "__builtin_ia32_sbb_u32" => {
+            }
+            "__builtin_ia32_addcarryx_u64"
+            | "__builtin_ia32_sbb_u64"
+            | "__builtin_ia32_addcarryx_u32"
+            | "__builtin_ia32_sbb_u32" => {
                 let mut new_args = args.to_vec();
                 let arg2_type = gcc_func.get_param_type(1);
                 let variable = builder.current_func().new_local(None, arg2_type, "addcarryResult");
                 new_args.push(variable.get_address(None));
                 args = new_args.into();
-            },
-            "__builtin_ia32_vpermt2varqi512_mask" | "__builtin_ia32_vpermt2varqi256_mask"
-                | "__builtin_ia32_vpermt2varqi128_mask" | "__builtin_ia32_vpermt2varhi512_mask"
-                | "__builtin_ia32_vpermt2varhi256_mask" | "__builtin_ia32_vpermt2varhi128_mask"
-                => {
+            }
+            "__builtin_ia32_vpermt2varqi512_mask"
+            | "__builtin_ia32_vpermt2varqi256_mask"
+            | "__builtin_ia32_vpermt2varqi128_mask"
+            | "__builtin_ia32_vpermt2varhi512_mask"
+            | "__builtin_ia32_vpermt2varhi256_mask"
+            | "__builtin_ia32_vpermt2varhi128_mask" => {
                 let new_args = args.to_vec();
                 let arg4_type = gcc_func.get_param_type(3);
                 let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
                 args = vec![new_args[1], new_args[0], new_args[2], minus_one].into();
-            },
+            }
             "__builtin_ia32_xrstor" | "__builtin_ia32_xsavec" => {
                 let new_args = args.to_vec();
                 let thirty_two = builder.context.new_rvalue_from_int(new_args[1].get_type(), 32);
@@ -235,22 +361,25 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
                 let arg2_type = gcc_func.get_param_type(1);
                 let arg2 = builder.context.new_cast(None, arg2, arg2_type);
                 args = vec![new_args[0], arg2].into();
-            },
+            }
             // These builtins are sent one more argument than needed.
             "__builtin_prefetch" => {
                 let mut new_args = args.to_vec();
                 new_args.pop();
                 args = new_args.into();
-            },
+            }
             // The GCC version returns one value of the tuple through a pointer.
             "__builtin_ia32_rdrand64_step" => {
-                let arg = builder.current_func().new_local(None, builder.ulonglong_type, "return_rdrand_arg");
+                let arg = builder.current_func().new_local(
+                    None,
+                    builder.ulonglong_type,
+                    "return_rdrand_arg",
+                );
                 args = vec![arg.get_address(None)].into();
-            },
+            }
             _ => (),
         }
-    }
-    else {
+    } else {
         match &*func_name {
             "__builtin_ia32_rndscaless_mask_round" | "__builtin_ia32_rndscalesd_mask_round" => {
                 let new_args = args.to_vec();
@@ -259,10 +388,10 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
                 let arg4_type = gcc_func.get_param_type(3);
                 let arg4 = builder.context.new_bitcast(None, new_args[2], arg4_type);
                 args = vec![new_args[0], new_args[1], arg3, arg4, new_args[3], new_args[5]].into();
-            },
+            }
             // NOTE: the LLVM intrinsic receives 3 floats, but the GCC builtin requires 3 vectors.
             // FIXME: the intrinsics like _mm_mask_fmadd_sd should probably directly call the GCC
-            // instrinsic to avoid this.
+            // intrinsic to avoid this.
             "__builtin_ia32_vfmaddss3_round" => {
                 let new_args = args.to_vec();
                 let arg1_type = gcc_func.get_param_type(0);
@@ -272,7 +401,7 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
                 let b = builder.context.new_rvalue_from_vector(None, arg2_type, &[new_args[1]; 4]);
                 let c = builder.context.new_rvalue_from_vector(None, arg3_type, &[new_args[2]; 4]);
                 args = vec![a, b, c, new_args[3]].into();
-            },
+            }
             "__builtin_ia32_vfmaddsd3_round" => {
                 let new_args = args.to_vec();
                 let arg1_type = gcc_func.get_param_type(0);
@@ -282,25 +411,34 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
                 let b = builder.context.new_rvalue_from_vector(None, arg2_type, &[new_args[1]; 2]);
                 let c = builder.context.new_rvalue_from_vector(None, arg3_type, &[new_args[2]; 2]);
                 args = vec![a, b, c, new_args[3]].into();
-            },
-            "__builtin_ia32_vfmaddsubpd256" | "__builtin_ia32_vfmaddsubps" | "__builtin_ia32_vfmaddsubps256"
-                | "__builtin_ia32_vfmaddsubpd" => {
+            }
+            "__builtin_ia32_vfmaddsubpd256"
+            | "__builtin_ia32_vfmaddsubps"
+            | "__builtin_ia32_vfmaddsubps256"
+            | "__builtin_ia32_vfmaddsubpd" => {
                 if let Some(original_function_name) = original_function_name {
                     match &**original_function_name {
-                        "llvm.x86.fma.vfmsubadd.pd.256" | "llvm.x86.fma.vfmsubadd.ps" | "llvm.x86.fma.vfmsubadd.ps.256"
-                            | "llvm.x86.fma.vfmsubadd.pd" => {
+                        "llvm.x86.fma.vfmsubadd.pd.256"
+                        | "llvm.x86.fma.vfmsubadd.ps"
+                        | "llvm.x86.fma.vfmsubadd.ps.256"
+                        | "llvm.x86.fma.vfmsubadd.pd" => {
                             // NOTE: since both llvm.x86.fma.vfmsubadd.ps and llvm.x86.fma.vfmaddsub.ps maps to
                             // __builtin_ia32_vfmaddsubps, only add minus if this comes from a
                             // subadd LLVM intrinsic, e.g. _mm256_fmsubadd_pd.
                             let mut new_args = args.to_vec();
                             let arg3 = &mut new_args[2];
-                            *arg3 = builder.context.new_unary_op(None, UnaryOp::Minus, arg3.get_type(), *arg3);
+                            *arg3 = builder.context.new_unary_op(
+                                None,
+                                UnaryOp::Minus,
+                                arg3.get_type(),
+                                *arg3,
+                            );
                             args = new_args.into();
-                        },
+                        }
                         _ => (),
                     }
                 }
-            },
+            }
             "__builtin_ia32_ldmxcsr" => {
                 // The builtin __builtin_ia32_ldmxcsr takes an integer value while llvm.x86.sse.ldmxcsr takes a pointer,
                 // so dereference the pointer.
@@ -309,23 +447,31 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
                 let arg1 = builder.context.new_cast(None, args[0], uint_ptr_type);
                 new_args[0] = arg1.dereference(None).to_rvalue();
                 args = new_args.into();
-            },
-            "__builtin_ia32_rcp14sd_mask" | "__builtin_ia32_rcp14ss_mask" | "__builtin_ia32_rsqrt14sd_mask"
-                | "__builtin_ia32_rsqrt14ss_mask" => {
+            }
+            "__builtin_ia32_rcp14sd_mask"
+            | "__builtin_ia32_rcp14ss_mask"
+            | "__builtin_ia32_rsqrt14sd_mask"
+            | "__builtin_ia32_rsqrt14ss_mask" => {
                 let new_args = args.to_vec();
                 args = vec![new_args[1], new_args[0], new_args[2], new_args[3]].into();
-            },
+            }
             "__builtin_ia32_sqrtsd_mask_round" | "__builtin_ia32_sqrtss_mask_round" => {
                 let new_args = args.to_vec();
                 args = vec![new_args[1], new_args[0], new_args[2], new_args[3], new_args[4]].into();
-            },
-            "__builtin_ia32_vpshrdv_v8di" | "__builtin_ia32_vpshrdv_v4di" | "__builtin_ia32_vpshrdv_v2di" |
-                "__builtin_ia32_vpshrdv_v16si" | "__builtin_ia32_vpshrdv_v8si" | "__builtin_ia32_vpshrdv_v4si" |
-                "__builtin_ia32_vpshrdv_v32hi" | "__builtin_ia32_vpshrdv_v16hi" | "__builtin_ia32_vpshrdv_v8hi" => {
+            }
+            "__builtin_ia32_vpshrdv_v8di"
+            | "__builtin_ia32_vpshrdv_v4di"
+            | "__builtin_ia32_vpshrdv_v2di"
+            | "__builtin_ia32_vpshrdv_v16si"
+            | "__builtin_ia32_vpshrdv_v8si"
+            | "__builtin_ia32_vpshrdv_v4si"
+            | "__builtin_ia32_vpshrdv_v32hi"
+            | "__builtin_ia32_vpshrdv_v16hi"
+            | "__builtin_ia32_vpshrdv_v8hi" => {
                 // The first two arguments are reversed, compared to LLVM.
                 let new_args = args.to_vec();
                 args = vec![new_args[1], new_args[0], new_args[2]].into();
-            },
+            }
             _ => (),
         }
     }
@@ -333,16 +479,27 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
     args
 }
 
-pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc, 'tcx>, mut return_value: RValue<'gcc>, func_name: &str, args: &[RValue<'gcc>], args_adjusted: bool, orig_args: &[RValue<'gcc>]) -> RValue<'gcc> {
+pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(
+    builder: &Builder<'a, 'gcc, 'tcx>,
+    mut return_value: RValue<'gcc>,
+    func_name: &str,
+    args: &[RValue<'gcc>],
+    args_adjusted: bool,
+    orig_args: &[RValue<'gcc>],
+) -> RValue<'gcc> {
     match func_name {
         "__builtin_ia32_vfmaddss3_round" | "__builtin_ia32_vfmaddsd3_round" => {
-            #[cfg(feature="master")]
+            #[cfg(feature = "master")]
             {
                 let zero = builder.context.new_rvalue_zero(builder.int_type);
-                return_value = builder.context.new_vector_access(None, return_value, zero).to_rvalue();
+                return_value =
+                    builder.context.new_vector_access(None, return_value, zero).to_rvalue();
             }
-        },
-        "__builtin_ia32_addcarryx_u64" | "__builtin_ia32_sbb_u64" | "__builtin_ia32_addcarryx_u32" | "__builtin_ia32_sbb_u32" => {
+        }
+        "__builtin_ia32_addcarryx_u64"
+        | "__builtin_ia32_sbb_u64"
+        | "__builtin_ia32_addcarryx_u32"
+        | "__builtin_ia32_sbb_u32" => {
             // Both llvm.x86.addcarry.32 and llvm.x86.addcarryx.u32 points to the same GCC builtin,
             // but only the former requires adjusting the return value.
             // Those 2 LLVM intrinsics differ by their argument count, that's why we check if the
@@ -351,10 +508,16 @@ pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc,
                 let last_arg = args.last().expect("last arg");
                 let field1 = builder.context.new_field(None, builder.u8_type, "carryFlag");
                 let field2 = builder.context.new_field(None, args[1].get_type(), "carryResult");
-                let struct_type = builder.context.new_struct_type(None, "addcarryResult", &[field1, field2]);
-                return_value = builder.context.new_struct_constructor(None, struct_type.as_type(), None, &[return_value, last_arg.dereference(None).to_rvalue()]);
+                let struct_type =
+                    builder.context.new_struct_type(None, "addcarryResult", &[field1, field2]);
+                return_value = builder.context.new_struct_constructor(
+                    None,
+                    struct_type.as_type(),
+                    None,
+                    &[return_value, last_arg.dereference(None).to_rvalue()],
+                );
             }
-        },
+        }
         "__builtin_ia32_stmxcsr" => {
             // The builtin __builtin_ia32_stmxcsr returns a value while llvm.x86.sse.stmxcsr writes
             // the result in its pointer argument.
@@ -366,20 +529,24 @@ pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc,
             // The return value was assigned to the result pointer above. In order to not call the
             // builtin twice, we overwrite the return value with a dummy value.
             return_value = builder.context.new_rvalue_zero(builder.int_type);
-        },
+        }
         "__builtin_ia32_rdrand64_step" => {
             let random_number = args[0].dereference(None).to_rvalue();
-            let success_variable = builder.current_func().new_local(None, return_value.get_type(), "success");
+            let success_variable =
+                builder.current_func().new_local(None, return_value.get_type(), "success");
             builder.llbb().add_assignment(None, success_variable, return_value);
 
             let field1 = builder.context.new_field(None, random_number.get_type(), "random_number");
             let field2 = builder.context.new_field(None, return_value.get_type(), "success");
-            let struct_type = builder.context.new_struct_type(None, "rdrand_result", &[field1, field2]);
-            return_value = builder.context.new_struct_constructor(None, struct_type.as_type(), None, &[
-                random_number,
-                success_variable.to_rvalue(),
-            ]);
-        },
+            let struct_type =
+                builder.context.new_struct_type(None, "rdrand_result", &[field1, field2]);
+            return_value = builder.context.new_struct_constructor(
+                None,
+                struct_type.as_type(),
+                None,
+                &[random_number, success_variable.to_rvalue()],
+            );
+        }
         _ => (),
     }
 
@@ -391,23 +558,33 @@ pub fn ignore_arg_cast(func_name: &str, index: usize, args_len: usize) -> bool {
     match func_name {
         // NOTE: these intrinsics have missing parameters before the last one, so ignore the
         // last argument type check.
-        "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
-            | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" | "__builtin_ia32_sqrtps512_mask"
-            | "__builtin_ia32_sqrtpd512_mask" | "__builtin_ia32_addps512_mask" | "__builtin_ia32_addpd512_mask"
-            | "__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask"
-            | "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask"
-            | "__builtin_ia32_divps512_mask" | "__builtin_ia32_divpd512_mask"
-            | "__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask"
-            | "__builtin_ia32_cvtdq2ps512_mask" | "__builtin_ia32_cvtudq2ps512_mask" => {
-                if index == args_len - 1 {
-                    return true;
-                }
-            },
+        "__builtin_ia32_maxps512_mask"
+        | "__builtin_ia32_maxpd512_mask"
+        | "__builtin_ia32_minps512_mask"
+        | "__builtin_ia32_minpd512_mask"
+        | "__builtin_ia32_sqrtps512_mask"
+        | "__builtin_ia32_sqrtpd512_mask"
+        | "__builtin_ia32_addps512_mask"
+        | "__builtin_ia32_addpd512_mask"
+        | "__builtin_ia32_subps512_mask"
+        | "__builtin_ia32_subpd512_mask"
+        | "__builtin_ia32_mulps512_mask"
+        | "__builtin_ia32_mulpd512_mask"
+        | "__builtin_ia32_divps512_mask"
+        | "__builtin_ia32_divpd512_mask"
+        | "__builtin_ia32_vfmaddsubps512_mask"
+        | "__builtin_ia32_vfmaddsubpd512_mask"
+        | "__builtin_ia32_cvtdq2ps512_mask"
+        | "__builtin_ia32_cvtudq2ps512_mask" => {
+            if index == args_len - 1 {
+                return true;
+            }
+        }
         "__builtin_ia32_rndscaless_mask_round" | "__builtin_ia32_rndscalesd_mask_round" => {
             if index == 2 || index == 3 {
                 return true;
             }
-        },
+        }
         "__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
             // Since there are two LLVM intrinsics that map to each of these GCC builtins and only
             // one of them has a missing parameter before the last one, we check the number of
@@ -415,49 +592,50 @@ pub fn ignore_arg_cast(func_name: &str, index: usize, args_len: usize) -> bool {
             if args_len == 4 && index == args_len - 1 {
                 return true;
             }
-        },
+        }
         // NOTE: the LLVM intrinsic receives 3 floats, but the GCC builtin requires 3 vectors.
         "__builtin_ia32_vfmaddss3_round" | "__builtin_ia32_vfmaddsd3_round" => return true,
-        "__builtin_ia32_vplzcntd_512_mask" | "__builtin_ia32_vplzcntd_256_mask" | "__builtin_ia32_vplzcntd_128_mask"
-            | "__builtin_ia32_vplzcntq_512_mask" | "__builtin_ia32_vplzcntq_256_mask" | "__builtin_ia32_vplzcntq_128_mask" => {
+        "__builtin_ia32_vplzcntd_512_mask"
+        | "__builtin_ia32_vplzcntd_256_mask"
+        | "__builtin_ia32_vplzcntd_128_mask"
+        | "__builtin_ia32_vplzcntq_512_mask"
+        | "__builtin_ia32_vplzcntq_256_mask"
+        | "__builtin_ia32_vplzcntq_128_mask" => {
             if index == args_len - 1 {
                 return true;
             }
-        },
+        }
         _ => (),
     }
 
     false
 }
 
-#[cfg(not(feature="master"))]
+#[cfg(not(feature = "master"))]
 pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function<'gcc> {
-    let gcc_name =
-        match name {
-            "llvm.x86.sse2.pause" => {
-                // NOTE: pause is only a hint, so we use a dummy built-in because target built-ins
-                // are not supported in libgccjit 12.
-                "__builtin_inff"
-            },
-            "llvm.x86.xgetbv" => {
-                "__builtin_trap"
-            },
-            _ => unimplemented!("unsupported LLVM intrinsic {}", name),
-        };
+    let gcc_name = match name {
+        "llvm.x86.sse2.pause" => {
+            // NOTE: pause is only a hint, so we use a dummy built-in because target built-ins
+            // are not supported in libgccjit 12.
+            "__builtin_inff"
+        }
+        "llvm.x86.xgetbv" => "__builtin_trap",
+        _ => unimplemented!("unsupported LLVM intrinsic {}", name),
+    };
     let func = cx.context.get_builtin_function(gcc_name);
     cx.functions.borrow_mut().insert(gcc_name.to_string(), func);
     return func;
 }
 
-#[cfg(feature="master")]
+#[cfg(feature = "master")]
 pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function<'gcc> {
     match name {
         "llvm.prefetch" => {
             let gcc_name = "__builtin_prefetch";
             let func = cx.context.get_builtin_function(gcc_name);
             cx.functions.borrow_mut().insert(gcc_name.to_string(), func);
-            return func
-        },
+            return func;
+        }
         _ => (),
     }
 
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
index d43f5d74757..a6c8b72e851 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
@@ -1,43 +1,48 @@
 pub mod llvm;
 mod simd;
 
-#[cfg(feature="master")]
+#[cfg(feature = "master")]
 use std::iter;
 
-#[cfg(feature="master")]
+#[cfg(feature = "master")]
 use gccjit::FunctionType;
 use gccjit::{ComparisonOp, Function, RValue, ToRValue, Type, UnaryOp};
-use rustc_codegen_ssa::MemFlags;
 use rustc_codegen_ssa::base::wants_msvc_seh;
 use rustc_codegen_ssa::common::IntPredicate;
+use rustc_codegen_ssa::errors::InvalidMonomorphization;
 use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue};
 use rustc_codegen_ssa::mir::place::PlaceRef;
-use rustc_codegen_ssa::traits::{ArgAbiMethods, BuilderMethods, ConstMethods, IntrinsicCallMethods};
-#[cfg(feature="master")]
+use rustc_codegen_ssa::traits::{
+    ArgAbiMethods, BuilderMethods, ConstMethods, IntrinsicCallMethods,
+};
+#[cfg(feature = "master")]
 use rustc_codegen_ssa::traits::{BaseTypeMethods, MiscMethods};
-use rustc_codegen_ssa::errors::InvalidMonomorphization;
+use rustc_codegen_ssa::MemFlags;
 use rustc_middle::bug;
-use rustc_middle::ty::{self, Instance, Ty};
 use rustc_middle::ty::layout::LayoutOf;
-#[cfg(feature="master")]
+#[cfg(feature = "master")]
 use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt};
-use rustc_span::{Span, Symbol, sym};
-use rustc_target::abi::HasDataLayout;
+use rustc_middle::ty::{self, Instance, Ty};
+use rustc_span::{sym, Span, Symbol};
 use rustc_target::abi::call::{ArgAbi, FnAbi, PassMode};
-use rustc_target::spec::PanicStrategy;
-#[cfg(feature="master")]
+use rustc_target::abi::HasDataLayout;
+#[cfg(feature = "master")]
 use rustc_target::spec::abi::Abi;
+use rustc_target::spec::PanicStrategy;
 
-use crate::abi::GccType;
-#[cfg(feature="master")]
+#[cfg(feature = "master")]
 use crate::abi::FnAbiGccExt;
+use crate::abi::GccType;
 use crate::builder::Builder;
 use crate::common::{SignType, TypeReflection};
 use crate::context::CodegenCx;
-use crate::type_of::LayoutGccExt;
 use crate::intrinsic::simd::generic_simd_intrinsic;
+use crate::type_of::LayoutGccExt;
 
-fn get_simple_intrinsic<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, name: Symbol) -> Option<Function<'gcc>> {
+fn get_simple_intrinsic<'gcc, 'tcx>(
+    cx: &CodegenCx<'gcc, 'tcx>,
+    name: Symbol,
+) -> Option<Function<'gcc>> {
     let gcc_name = match name {
         sym::sqrtf32 => "sqrtf",
         sym::sqrtf64 => "sqrt",
@@ -90,7 +95,14 @@ fn get_simple_intrinsic<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, name: Symbol) ->
 }
 
 impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
-    fn codegen_intrinsic_call(&mut self, instance: Instance<'tcx>, fn_abi: &FnAbi<'tcx, Ty<'tcx>>, args: &[OperandRef<'tcx, RValue<'gcc>>], llresult: RValue<'gcc>, span: Span) -> Result<(), Instance<'tcx>> {
+    fn codegen_intrinsic_call(
+        &mut self,
+        instance: Instance<'tcx>,
+        fn_abi: &FnAbi<'tcx, Ty<'tcx>>,
+        args: &[OperandRef<'tcx, RValue<'gcc>>],
+        llresult: RValue<'gcc>,
+        span: Span,
+    ) -> Result<(), Instance<'tcx>> {
         let tcx = self.tcx;
         let callee_ty = instance.ty(tcx, ty::ParamEnv::reveal_all());
 
@@ -110,268 +122,274 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
         let result = PlaceRef::new_sized(llresult, fn_abi.ret.layout);
 
         let simple = get_simple_intrinsic(self, name);
-        let llval =
-            match name {
-                _ if simple.is_some() => {
-                    // FIXME(antoyo): remove this cast when the API supports function.
-                    let func = unsafe { std::mem::transmute(simple.expect("simple")) };
-                    self.call(self.type_void(), None, None, func, &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(), None)
-                },
-                sym::likely => {
-                    self.expect(args[0].immediate(), true)
-                }
-                sym::unlikely => {
-                    self.expect(args[0].immediate(), false)
-                }
-                sym::is_val_statically_known => {
-                    let a = args[0].immediate();
-                    let builtin = self.context.get_builtin_function("__builtin_constant_p");
-                    let res = self.context.new_call(None, builtin, &[a]);
-                    self.icmp(IntPredicate::IntEQ, res, self.const_i32(0))
-                }
-                sym::catch_unwind => {
-                    try_intrinsic(
-                        self,
-                        args[0].immediate(),
-                        args[1].immediate(),
-                        args[2].immediate(),
-                        llresult,
-                    );
-                    return Ok(());
-                }
-                sym::breakpoint => {
-                    unimplemented!();
-                }
-                sym::va_copy => {
-                    unimplemented!();
-                }
-                sym::va_arg => {
-                    unimplemented!();
-                }
+        let llval = match name {
+            _ if simple.is_some() => {
+                // FIXME(antoyo): remove this cast when the API supports function.
+                let func = unsafe { std::mem::transmute(simple.expect("simple")) };
+                self.call(
+                    self.type_void(),
+                    None,
+                    None,
+                    func,
+                    &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
+                    None,
+                )
+            }
+            sym::likely => self.expect(args[0].immediate(), true),
+            sym::unlikely => self.expect(args[0].immediate(), false),
+            sym::is_val_statically_known => {
+                let a = args[0].immediate();
+                let builtin = self.context.get_builtin_function("__builtin_constant_p");
+                let res = self.context.new_call(None, builtin, &[a]);
+                self.icmp(IntPredicate::IntEQ, res, self.const_i32(0))
+            }
+            sym::catch_unwind => {
+                try_intrinsic(
+                    self,
+                    args[0].immediate(),
+                    args[1].immediate(),
+                    args[2].immediate(),
+                    llresult,
+                );
+                return Ok(());
+            }
+            sym::breakpoint => {
+                unimplemented!();
+            }
+            sym::va_copy => {
+                unimplemented!();
+            }
+            sym::va_arg => {
+                unimplemented!();
+            }
 
-                sym::volatile_load | sym::unaligned_volatile_load => {
-                    let tp_ty = fn_args.type_at(0);
-                    let ptr = args[0].immediate();
-                    let load =
-                        if let PassMode::Cast { cast: ty, pad_i32: _ } = &fn_abi.ret.mode {
-                            let gcc_ty = ty.gcc_type(self);
-                            self.volatile_load(gcc_ty, ptr)
+            sym::volatile_load | sym::unaligned_volatile_load => {
+                let tp_ty = fn_args.type_at(0);
+                let ptr = args[0].immediate();
+                let load = if let PassMode::Cast { cast: ty, pad_i32: _ } = &fn_abi.ret.mode {
+                    let gcc_ty = ty.gcc_type(self);
+                    self.volatile_load(gcc_ty, ptr)
+                } else {
+                    self.volatile_load(self.layout_of(tp_ty).gcc_type(self), ptr)
+                };
+                // TODO(antoyo): set alignment.
+                self.to_immediate(load, self.layout_of(tp_ty))
+            }
+            sym::volatile_store => {
+                let dst = args[0].deref(self.cx());
+                args[1].val.volatile_store(self, dst);
+                return Ok(());
+            }
+            sym::unaligned_volatile_store => {
+                let dst = args[0].deref(self.cx());
+                args[1].val.unaligned_volatile_store(self, dst);
+                return Ok(());
+            }
+            sym::prefetch_read_data
+            | sym::prefetch_write_data
+            | sym::prefetch_read_instruction
+            | sym::prefetch_write_instruction => {
+                unimplemented!();
+            }
+            sym::ctlz
+            | sym::ctlz_nonzero
+            | sym::cttz
+            | sym::cttz_nonzero
+            | sym::ctpop
+            | sym::bswap
+            | sym::bitreverse
+            | sym::rotate_left
+            | sym::rotate_right
+            | sym::saturating_add
+            | sym::saturating_sub => {
+                let ty = arg_tys[0];
+                match int_type_width_signed(ty, self) {
+                    Some((width, signed)) => match name {
+                        sym::ctlz | sym::cttz => {
+                            let func = self.current_func.borrow().expect("func");
+                            let then_block = func.new_block("then");
+                            let else_block = func.new_block("else");
+                            let after_block = func.new_block("after");
+
+                            let arg = args[0].immediate();
+                            let result = func.new_local(None, arg.get_type(), "zeros");
+                            let zero = self.cx.gcc_zero(arg.get_type());
+                            let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero);
+                            self.llbb().end_with_conditional(None, cond, then_block, else_block);
+
+                            let zero_result = self.cx.gcc_uint(arg.get_type(), width);
+                            then_block.add_assignment(None, result, zero_result);
+                            then_block.end_with_jump(None, after_block);
+
+                            // NOTE: since jumps were added in a place
+                            // count_leading_zeroes() does not expect, the current block
+                            // in the state need to be updated.
+                            self.switch_to_block(else_block);
+
+                            let zeros = match name {
+                                sym::ctlz => self.count_leading_zeroes(width, arg),
+                                sym::cttz => self.count_trailing_zeroes(width, arg),
+                                _ => unreachable!(),
+                            };
+                            self.llbb().add_assignment(None, result, zeros);
+                            self.llbb().end_with_jump(None, after_block);
+
+                            // NOTE: since jumps were added in a place rustc does not
+                            // expect, the current block in the state need to be updated.
+                            self.switch_to_block(after_block);
+
+                            result.to_rvalue()
                         }
-                        else {
-                            self.volatile_load(self.layout_of(tp_ty).gcc_type(self), ptr)
-                        };
-                    // TODO(antoyo): set alignment.
-                    self.to_immediate(load, self.layout_of(tp_ty))
-                }
-                sym::volatile_store => {
-                    let dst = args[0].deref(self.cx());
-                    args[1].val.volatile_store(self, dst);
-                    return Ok(());
-                }
-                sym::unaligned_volatile_store => {
-                    let dst = args[0].deref(self.cx());
-                    args[1].val.unaligned_volatile_store(self, dst);
-                    return Ok(());
-                }
-                sym::prefetch_read_data
-                    | sym::prefetch_write_data
-                    | sym::prefetch_read_instruction
-                    | sym::prefetch_write_instruction => {
-                        unimplemented!();
-                    }
-                sym::ctlz
-                    | sym::ctlz_nonzero
-                    | sym::cttz
-                    | sym::cttz_nonzero
-                    | sym::ctpop
-                    | sym::bswap
-                    | sym::bitreverse
-                    | sym::rotate_left
-                    | sym::rotate_right
-                    | sym::saturating_add
-                    | sym::saturating_sub => {
-                        let ty = arg_tys[0];
-                        match int_type_width_signed(ty, self) {
-                            Some((width, signed)) => match name {
-                                sym::ctlz | sym::cttz => {
-                                    let func = self.current_func.borrow().expect("func");
-                                    let then_block = func.new_block("then");
-                                    let else_block = func.new_block("else");
-                                    let after_block = func.new_block("after");
-
-                                    let arg = args[0].immediate();
-                                    let result = func.new_local(None, arg.get_type(), "zeros");
-                                    let zero = self.cx.gcc_zero(arg.get_type());
-                                    let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero);
-                                    self.llbb().end_with_conditional(None, cond, then_block, else_block);
-
-                                    let zero_result = self.cx.gcc_uint(arg.get_type(), width);
-                                    then_block.add_assignment(None, result, zero_result);
-                                    then_block.end_with_jump(None, after_block);
-
-                                    // NOTE: since jumps were added in a place
-                                    // count_leading_zeroes() does not expect, the current block
-                                    // in the state need to be updated.
-                                    self.switch_to_block(else_block);
-
-                                    let zeros =
-                                        match name {
-                                            sym::ctlz => self.count_leading_zeroes(width, arg),
-                                            sym::cttz => self.count_trailing_zeroes(width, arg),
-                                            _ => unreachable!(),
-                                        };
-                                    self.llbb().add_assignment(None, result, zeros);
-                                    self.llbb().end_with_jump(None, after_block);
-
-                                    // NOTE: since jumps were added in a place rustc does not
-                                    // expect, the current block in the state need to be updated.
-                                    self.switch_to_block(after_block);
-
-                                    result.to_rvalue()
-                                }
-                                sym::ctlz_nonzero => {
-                                    self.count_leading_zeroes(width, args[0].immediate())
-                                },
-                                sym::cttz_nonzero => {
-                                    self.count_trailing_zeroes(width, args[0].immediate())
-                                }
-                                sym::ctpop => self.pop_count(args[0].immediate()),
-                                sym::bswap => {
-                                    if width == 8 {
-                                        args[0].immediate() // byte swap a u8/i8 is just a no-op
-                                    }
-                                    else {
-                                        self.gcc_bswap(args[0].immediate(), width)
-                                    }
-                                },
-                                sym::bitreverse => self.bit_reverse(width, args[0].immediate()),
-                                sym::rotate_left | sym::rotate_right => {
-                                    // TODO(antoyo): implement using algorithm from:
-                                    // https://blog.regehr.org/archives/1063
-                                    // for other platforms.
-                                    let is_left = name == sym::rotate_left;
-                                    let val = args[0].immediate();
-                                    let raw_shift = args[1].immediate();
-                                    if is_left {
-                                        self.rotate_left(val, raw_shift, width)
-                                    }
-                                    else {
-                                        self.rotate_right(val, raw_shift, width)
-                                    }
-                                },
-                                sym::saturating_add => {
-                                    self.saturating_add(args[0].immediate(), args[1].immediate(), signed, width)
-                                },
-                                sym::saturating_sub => {
-                                    self.saturating_sub(args[0].immediate(), args[1].immediate(), signed, width)
-                                },
-                                _ => bug!(),
-                            },
-                            None => {
-                                tcx.dcx().emit_err(InvalidMonomorphization::BasicIntegerType { span, name, ty });
-                                return Ok(());
+                        sym::ctlz_nonzero => self.count_leading_zeroes(width, args[0].immediate()),
+                        sym::cttz_nonzero => self.count_trailing_zeroes(width, args[0].immediate()),
+                        sym::ctpop => self.pop_count(args[0].immediate()),
+                        sym::bswap => {
+                            if width == 8 {
+                                args[0].immediate() // byte swap a u8/i8 is just a no-op
+                            } else {
+                                self.gcc_bswap(args[0].immediate(), width)
                             }
                         }
-                    }
-
-                sym::raw_eq => {
-                    use rustc_target::abi::Abi::*;
-                    let tp_ty = fn_args.type_at(0);
-                    let layout = self.layout_of(tp_ty).layout;
-                    let _use_integer_compare = match layout.abi() {
-                        Scalar(_) | ScalarPair(_, _) => true,
-                        Uninhabited | Vector { .. } => false,
-                        Aggregate { .. } => {
-                            // For rusty ABIs, small aggregates are actually passed
-                            // as `RegKind::Integer` (see `FnAbi::adjust_for_abi`),
-                            // so we re-use that same threshold here.
-                            layout.size() <= self.data_layout().pointer_size * 2
+                        sym::bitreverse => self.bit_reverse(width, args[0].immediate()),
+                        sym::rotate_left | sym::rotate_right => {
+                            // TODO(antoyo): implement using algorithm from:
+                            // https://blog.regehr.org/archives/1063
+                            // for other platforms.
+                            let is_left = name == sym::rotate_left;
+                            let val = args[0].immediate();
+                            let raw_shift = args[1].immediate();
+                            if is_left {
+                                self.rotate_left(val, raw_shift, width)
+                            } else {
+                                self.rotate_right(val, raw_shift, width)
+                            }
                         }
-                    };
-
-                    let a = args[0].immediate();
-                    let b = args[1].immediate();
-                    if layout.size().bytes() == 0 {
-                        self.const_bool(true)
-                    }
-                    /*else if use_integer_compare {
-                        let integer_ty = self.type_ix(layout.size.bits()); // FIXME(antoyo): LLVM creates an integer of 96 bits for [i32; 3], but gcc doesn't support this, so it creates an integer of 128 bits.
-                        let ptr_ty = self.type_ptr_to(integer_ty);
-                        let a_ptr = self.bitcast(a, ptr_ty);
-                        let a_val = self.load(integer_ty, a_ptr, layout.align.abi);
-                        let b_ptr = self.bitcast(b, ptr_ty);
-                        let b_val = self.load(integer_ty, b_ptr, layout.align.abi);
-                        self.icmp(IntPredicate::IntEQ, a_val, b_val)
-                    }*/
-                    else {
-                        let void_ptr_type = self.context.new_type::<*const ()>();
-                        let a_ptr = self.bitcast(a, void_ptr_type);
-                        let b_ptr = self.bitcast(b, void_ptr_type);
-                        let n = self.context.new_cast(None, self.const_usize(layout.size().bytes()), self.sizet_type);
-                        let builtin = self.context.get_builtin_function("memcmp");
-                        let cmp = self.context.new_call(None, builtin, &[a_ptr, b_ptr, n]);
-                        self.icmp(IntPredicate::IntEQ, cmp, self.const_i32(0))
+                        sym::saturating_add => self.saturating_add(
+                            args[0].immediate(),
+                            args[1].immediate(),
+                            signed,
+                            width,
+                        ),
+                        sym::saturating_sub => self.saturating_sub(
+                            args[0].immediate(),
+                            args[1].immediate(),
+                            signed,
+                            width,
+                        ),
+                        _ => bug!(),
+                    },
+                    None => {
+                        tcx.dcx().emit_err(InvalidMonomorphization::BasicIntegerType {
+                            span,
+                            name,
+                            ty,
+                        });
+                        return Ok(());
                     }
                 }
+            }
 
-                sym::compare_bytes => {
-                    let a = args[0].immediate();
-                    let b = args[1].immediate();
-                    let n = args[2].immediate();
+            sym::raw_eq => {
+                use rustc_target::abi::Abi::*;
+                let tp_ty = fn_args.type_at(0);
+                let layout = self.layout_of(tp_ty).layout;
+                let _use_integer_compare = match layout.abi() {
+                    Scalar(_) | ScalarPair(_, _) => true,
+                    Uninhabited | Vector { .. } => false,
+                    Aggregate { .. } => {
+                        // For rusty ABIs, small aggregates are actually passed
+                        // as `RegKind::Integer` (see `FnAbi::adjust_for_abi`),
+                        // so we re-use that same threshold here.
+                        layout.size() <= self.data_layout().pointer_size * 2
+                    }
+                };
 
+                let a = args[0].immediate();
+                let b = args[1].immediate();
+                if layout.size().bytes() == 0 {
+                    self.const_bool(true)
+                }
+                /*else if use_integer_compare {
+                    let integer_ty = self.type_ix(layout.size.bits()); // FIXME(antoyo): LLVM creates an integer of 96 bits for [i32; 3], but gcc doesn't support this, so it creates an integer of 128 bits.
+                    let ptr_ty = self.type_ptr_to(integer_ty);
+                    let a_ptr = self.bitcast(a, ptr_ty);
+                    let a_val = self.load(integer_ty, a_ptr, layout.align.abi);
+                    let b_ptr = self.bitcast(b, ptr_ty);
+                    let b_val = self.load(integer_ty, b_ptr, layout.align.abi);
+                    self.icmp(IntPredicate::IntEQ, a_val, b_val)
+                }*/
+                else {
                     let void_ptr_type = self.context.new_type::<*const ()>();
                     let a_ptr = self.bitcast(a, void_ptr_type);
                     let b_ptr = self.bitcast(b, void_ptr_type);
-
-                    // Here we assume that the `memcmp` provided by the target is a NOP for size 0.
+                    let n = self.context.new_cast(
+                        None,
+                        self.const_usize(layout.size().bytes()),
+                        self.sizet_type,
+                    );
                     let builtin = self.context.get_builtin_function("memcmp");
                     let cmp = self.context.new_call(None, builtin, &[a_ptr, b_ptr, n]);
-                    self.sext(cmp, self.type_ix(32))
+                    self.icmp(IntPredicate::IntEQ, cmp, self.const_i32(0))
                 }
+            }
 
-                sym::black_box => {
-                    args[0].val.store(self, result);
+            sym::compare_bytes => {
+                let a = args[0].immediate();
+                let b = args[1].immediate();
+                let n = args[2].immediate();
 
-                    let block = self.llbb();
-                    let extended_asm = block.add_extended_asm(None, "");
-                    extended_asm.add_input_operand(None, "r", result.llval);
-                    extended_asm.add_clobber("memory");
-                    extended_asm.set_volatile_flag(true);
+                let void_ptr_type = self.context.new_type::<*const ()>();
+                let a_ptr = self.bitcast(a, void_ptr_type);
+                let b_ptr = self.bitcast(b, void_ptr_type);
 
-                    // We have copied the value to `result` already.
-                    return Ok(());
-                }
+                // Here we assume that the `memcmp` provided by the target is a NOP for size 0.
+                let builtin = self.context.get_builtin_function("memcmp");
+                let cmp = self.context.new_call(None, builtin, &[a_ptr, b_ptr, n]);
+                self.sext(cmp, self.type_ix(32))
+            }
 
-                sym::ptr_mask => {
-                    let usize_type = self.context.new_type::<usize>();
-                    let void_ptr_type = self.context.new_type::<*const ()>();
+            sym::black_box => {
+                args[0].val.store(self, result);
 
-                    let ptr = args[0].immediate();
-                    let mask = args[1].immediate();
+                let block = self.llbb();
+                let extended_asm = block.add_extended_asm(None, "");
+                extended_asm.add_input_operand(None, "r", result.llval);
+                extended_asm.add_clobber("memory");
+                extended_asm.set_volatile_flag(true);
 
-                    let addr = self.bitcast(ptr, usize_type);
-                    let masked = self.and(addr, mask);
-                    self.bitcast(masked, void_ptr_type)
-                },
+                // We have copied the value to `result` already.
+                return Ok(());
+            }
 
-                _ if name_str.starts_with("simd_") => {
-                    match generic_simd_intrinsic(self, name, callee_ty, args, ret_ty, llret_ty, span) {
-                        Ok(llval) => llval,
-                        Err(()) => return Ok(()),
-                    }
+            sym::ptr_mask => {
+                let usize_type = self.context.new_type::<usize>();
+                let void_ptr_type = self.context.new_type::<*const ()>();
+
+                let ptr = args[0].immediate();
+                let mask = args[1].immediate();
+
+                let addr = self.bitcast(ptr, usize_type);
+                let masked = self.and(addr, mask);
+                self.bitcast(masked, void_ptr_type)
+            }
+
+            _ if name_str.starts_with("simd_") => {
+                match generic_simd_intrinsic(self, name, callee_ty, args, ret_ty, llret_ty, span) {
+                    Ok(llval) => llval,
+                    Err(()) => return Ok(()),
                 }
+            }
 
-                // Fall back to default body
-                _ => return Err(Instance::new(instance.def_id(), instance.args)),
-            };
+            // Fall back to default body
+            _ => return Err(Instance::new(instance.def_id(), instance.args)),
+        };
 
         if !fn_abi.ret.is_ignore() {
             if let PassMode::Cast { cast: ty, .. } = &fn_abi.ret.mode {
                 let ptr_llty = self.type_ptr_to(ty.gcc_type(self));
                 let ptr = self.pointercast(result.llval, ptr_llty);
                 self.store(llval, ptr, result.align);
-            }
-            else {
+            } else {
                 OperandRef::from_immediate_or_packed_pair(self, llval, result.layout)
                     .val
                     .store(self, result);
@@ -423,11 +441,21 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
 }
 
 impl<'a, 'gcc, 'tcx> ArgAbiMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
-    fn store_fn_arg(&mut self, arg_abi: &ArgAbi<'tcx, Ty<'tcx>>, idx: &mut usize, dst: PlaceRef<'tcx, Self::Value>) {
+    fn store_fn_arg(
+        &mut self,
+        arg_abi: &ArgAbi<'tcx, Ty<'tcx>>,
+        idx: &mut usize,
+        dst: PlaceRef<'tcx, Self::Value>,
+    ) {
         arg_abi.store_fn_arg(self, idx, dst)
     }
 
-    fn store_arg(&mut self, arg_abi: &ArgAbi<'tcx, Ty<'tcx>>, val: RValue<'gcc>, dst: PlaceRef<'tcx, RValue<'gcc>>) {
+    fn store_arg(
+        &mut self,
+        arg_abi: &ArgAbi<'tcx, Ty<'tcx>>,
+        val: RValue<'gcc>,
+        dst: PlaceRef<'tcx, RValue<'gcc>>,
+    ) {
         arg_abi.store(self, val, dst)
     }
 
@@ -438,8 +466,18 @@ impl<'a, 'gcc, 'tcx> ArgAbiMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
 
 pub trait ArgAbiExt<'gcc, 'tcx> {
     fn memory_ty(&self, cx: &CodegenCx<'gcc, 'tcx>) -> Type<'gcc>;
-    fn store(&self, bx: &mut Builder<'_, 'gcc, 'tcx>, val: RValue<'gcc>, dst: PlaceRef<'tcx, RValue<'gcc>>);
-    fn store_fn_arg(&self, bx: &mut Builder<'_, 'gcc, 'tcx>, idx: &mut usize, dst: PlaceRef<'tcx, RValue<'gcc>>);
+    fn store(
+        &self,
+        bx: &mut Builder<'_, 'gcc, 'tcx>,
+        val: RValue<'gcc>,
+        dst: PlaceRef<'tcx, RValue<'gcc>>,
+    );
+    fn store_fn_arg(
+        &self,
+        bx: &mut Builder<'_, 'gcc, 'tcx>,
+        idx: &mut usize,
+        dst: PlaceRef<'tcx, RValue<'gcc>>,
+    );
 }
 
 impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
@@ -453,17 +491,20 @@ impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
     /// place for the original Rust type of this argument/return.
     /// Can be used for both storing formal arguments into Rust variables
     /// or results of call/invoke instructions into their destinations.
-    fn store(&self, bx: &mut Builder<'_, 'gcc, 'tcx>, val: RValue<'gcc>, dst: PlaceRef<'tcx, RValue<'gcc>>) {
+    fn store(
+        &self,
+        bx: &mut Builder<'_, 'gcc, 'tcx>,
+        val: RValue<'gcc>,
+        dst: PlaceRef<'tcx, RValue<'gcc>>,
+    ) {
         if self.is_ignore() {
             return;
         }
         if self.is_sized_indirect() {
             OperandValue::Ref(val, None, self.layout.align.abi).store(bx, dst)
-        }
-        else if self.is_unsized_indirect() {
+        } else if self.is_unsized_indirect() {
             bug!("unsized `ArgAbi` must be handled through `store_fn_arg`");
-        }
-        else if let PassMode::Cast { ref cast, .. } = self.mode {
+        } else if let PassMode::Cast { ref cast, .. } = self.mode {
             // FIXME(eddyb): Figure out when the simpler Store is safe, clang
             // uses it for i16 -> {i8, i8}, but not for i24 -> {i8, i8, i8}.
             let can_store_through_cast_ptr = false;
@@ -471,8 +512,7 @@ impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
                 let cast_ptr_llty = bx.type_ptr_to(cast.gcc_type(bx));
                 let cast_dst = bx.pointercast(dst.llval, cast_ptr_llty);
                 bx.store(val, cast_dst, self.layout.align.abi);
-            }
-            else {
+            } else {
                 // The actual return type is a struct, but the ABI
                 // adaptation code has cast it into some scalar type.  The
                 // code that follows is the only reliable way I have
@@ -508,35 +548,44 @@ impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
 
                 bx.lifetime_end(llscratch, scratch_size);
             }
-        }
-        else {
+        } else {
             OperandValue::Immediate(val).store(bx, dst);
         }
     }
 
-    fn store_fn_arg<'a>(&self, bx: &mut Builder<'a, 'gcc, 'tcx>, idx: &mut usize, dst: PlaceRef<'tcx, RValue<'gcc>>) {
+    fn store_fn_arg<'a>(
+        &self,
+        bx: &mut Builder<'a, 'gcc, 'tcx>,
+        idx: &mut usize,
+        dst: PlaceRef<'tcx, RValue<'gcc>>,
+    ) {
         let mut next = || {
             let val = bx.current_func().get_param(*idx as i32);
             *idx += 1;
             val.to_rvalue()
         };
         match self.mode {
-            PassMode::Ignore => {},
+            PassMode::Ignore => {}
             PassMode::Pair(..) => {
                 OperandValue::Pair(next(), next()).store(bx, dst);
-            },
+            }
             PassMode::Indirect { meta_attrs: Some(_), .. } => {
                 OperandValue::Ref(next(), Some(next()), self.layout.align.abi).store(bx, dst);
-            },
-            PassMode::Direct(_) | PassMode::Indirect { meta_attrs: None, .. } | PassMode::Cast { .. } => {
+            }
+            PassMode::Direct(_)
+            | PassMode::Indirect { meta_attrs: None, .. }
+            | PassMode::Cast { .. } => {
                 let next_arg = next();
                 self.store(bx, next_arg, dst);
-            },
+            }
         }
     }
 }
 
-fn int_type_width_signed<'gcc, 'tcx>(ty: Ty<'tcx>, cx: &CodegenCx<'gcc, 'tcx>) -> Option<(u64, bool)> {
+fn int_type_width_signed<'gcc, 'tcx>(
+    ty: Ty<'tcx>,
+    cx: &CodegenCx<'gcc, 'tcx>,
+) -> Option<(u64, bool)> {
     match ty.kind() {
         ty::Int(t) => Some((
             match t {
@@ -570,82 +619,76 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         let typ = result_type.to_unsigned(self.cx);
 
         let value =
-            if result_type.is_signed(self.cx) {
-                self.gcc_int_cast(value, typ)
-            }
-            else {
-                value
-            };
+            if result_type.is_signed(self.cx) { self.gcc_int_cast(value, typ) } else { value };
 
         let context = &self.cx.context;
-        let result =
-            match width {
-                8 | 16 | 32 | 64 => {
-                    let mask = ((1u128 << width) - 1) as u64;
-                    let (m0, m1, m2) = if width > 16 {
-                        (
-                            context.new_rvalue_from_long(typ, (0x5555555555555555u64 & mask) as i64),
-                            context.new_rvalue_from_long(typ, (0x3333333333333333u64 & mask) as i64),
-                            context.new_rvalue_from_long(typ, (0x0f0f0f0f0f0f0f0fu64 & mask) as i64),
-                        )
-                    } else {
-                        (
-                            context.new_rvalue_from_int(typ, (0x5555u64 & mask) as i32),
-                            context.new_rvalue_from_int(typ, (0x3333u64 & mask) as i32),
-                            context.new_rvalue_from_int(typ, (0x0f0fu64 & mask) as i32),
-                        )
-                    };
-                    let one = context.new_rvalue_from_int(typ, 1);
-                    let two = context.new_rvalue_from_int(typ, 2);
-                    let four = context.new_rvalue_from_int(typ, 4);
-
-                    // First step.
-                    let left = self.lshr(value, one);
-                    let left = self.and(left, m0);
-                    let right = self.and(value, m0);
-                    let right = self.shl(right, one);
-                    let step1 = self.or(left, right);
-
-                    // Second step.
-                    let left = self.lshr(step1, two);
-                    let left = self.and(left, m1);
-                    let right = self.and(step1, m1);
-                    let right = self.shl(right, two);
-                    let step2 = self.or(left, right);
-
-                    // Third step.
-                    let left = self.lshr(step2, four);
-                    let left = self.and(left, m2);
-                    let right = self.and(step2, m2);
-                    let right = self.shl(right, four);
-                    let step3 = self.or(left, right);
-
-                    // Fourth step.
-                    if width == 8 {
-                        step3
-                    } else {
-                        self.gcc_bswap(step3, width)
-                    }
-                },
-                128 => {
-                    // TODO(antoyo): find a more efficient implementation?
-                    let sixty_four = self.gcc_int(typ, 64);
-                    let right_shift = self.gcc_lshr(value, sixty_four);
-                    let high = self.gcc_int_cast(right_shift, self.u64_type);
-                    let low = self.gcc_int_cast(value, self.u64_type);
-
-                    let reversed_high = self.bit_reverse(64, high);
-                    let reversed_low = self.bit_reverse(64, low);
-
-                    let new_low = self.gcc_int_cast(reversed_high, typ);
-                    let new_high = self.shl(self.gcc_int_cast(reversed_low, typ), sixty_four);
-
-                    self.gcc_or(new_low, new_high)
-                },
-                _ => {
-                    panic!("cannot bit reverse with width = {}", width);
-                },
-            };
+        let result = match width {
+            8 | 16 | 32 | 64 => {
+                let mask = ((1u128 << width) - 1) as u64;
+                let (m0, m1, m2) = if width > 16 {
+                    (
+                        context.new_rvalue_from_long(typ, (0x5555555555555555u64 & mask) as i64),
+                        context.new_rvalue_from_long(typ, (0x3333333333333333u64 & mask) as i64),
+                        context.new_rvalue_from_long(typ, (0x0f0f0f0f0f0f0f0fu64 & mask) as i64),
+                    )
+                } else {
+                    (
+                        context.new_rvalue_from_int(typ, (0x5555u64 & mask) as i32),
+                        context.new_rvalue_from_int(typ, (0x3333u64 & mask) as i32),
+                        context.new_rvalue_from_int(typ, (0x0f0fu64 & mask) as i32),
+                    )
+                };
+                let one = context.new_rvalue_from_int(typ, 1);
+                let two = context.new_rvalue_from_int(typ, 2);
+                let four = context.new_rvalue_from_int(typ, 4);
+
+                // First step.
+                let left = self.lshr(value, one);
+                let left = self.and(left, m0);
+                let right = self.and(value, m0);
+                let right = self.shl(right, one);
+                let step1 = self.or(left, right);
+
+                // Second step.
+                let left = self.lshr(step1, two);
+                let left = self.and(left, m1);
+                let right = self.and(step1, m1);
+                let right = self.shl(right, two);
+                let step2 = self.or(left, right);
+
+                // Third step.
+                let left = self.lshr(step2, four);
+                let left = self.and(left, m2);
+                let right = self.and(step2, m2);
+                let right = self.shl(right, four);
+                let step3 = self.or(left, right);
+
+                // Fourth step.
+                if width == 8 {
+                    step3
+                } else {
+                    self.gcc_bswap(step3, width)
+                }
+            }
+            128 => {
+                // TODO(antoyo): find a more efficient implementation?
+                let sixty_four = self.gcc_int(typ, 64);
+                let right_shift = self.gcc_lshr(value, sixty_four);
+                let high = self.gcc_int_cast(right_shift, self.u64_type);
+                let low = self.gcc_int_cast(value, self.u64_type);
+
+                let reversed_high = self.bit_reverse(64, high);
+                let reversed_low = self.bit_reverse(64, low);
+
+                let new_low = self.gcc_int_cast(reversed_high, typ);
+                let new_high = self.shl(self.gcc_int_cast(reversed_low, typ), sixty_four);
+
+                self.gcc_or(new_low, new_high, self.location)
+            }
+            _ => {
+                panic!("cannot bit reverse with width = {}", width);
+            }
+        };
 
         self.gcc_int_cast(result, result_type)
     }
@@ -685,56 +728,54 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
                 let first_elem = self.context.new_array_access(None, result, zero);
                 let first_value = self.gcc_int_cast(self.context.new_call(None, clzll, &[high]), arg_type);
                 self.llbb()
-                    .add_assignment(None, first_elem, first_value);
+                    .add_assignment(self.location, first_elem, first_value);
 
-                let second_elem = self.context.new_array_access(None, result, one);
-                let cast = self.gcc_int_cast(self.context.new_call(None, clzll, &[low]), arg_type);
+                let second_elem = self.context.new_array_access(self.location, result, one);
+                let cast = self.gcc_int_cast(self.context.new_call(self.location, clzll, &[low]), arg_type);
                 let second_value = self.add(cast, sixty_four);
                 self.llbb()
-                    .add_assignment(None, second_elem, second_value);
+                    .add_assignment(self.location, second_elem, second_value);
 
-                let third_elem = self.context.new_array_access(None, result, two);
+                let third_elem = self.context.new_array_access(self.location, result, two);
                 let third_value = self.const_uint(arg_type, 128);
                 self.llbb()
-                    .add_assignment(None, third_elem, third_value);
+                    .add_assignment(self.location, third_elem, third_value);
 
-                let not_high = self.context.new_unary_op(None, UnaryOp::LogicalNegate, self.u64_type, high);
-                let not_low = self.context.new_unary_op(None, UnaryOp::LogicalNegate, self.u64_type, low);
+                let not_high = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, high);
+                let not_low = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, low);
                 let not_low_and_not_high = not_low & not_high;
                 let index = not_high + not_low_and_not_high;
                 // NOTE: the following cast is necessary to avoid a GIMPLE verification failure in
                 // gcc.
                 // TODO(antoyo): do the correct verification in libgccjit to avoid an error at the
                 // compilation stage.
-                let index = self.context.new_cast(None, index, self.i32_type);
+                let index = self.context.new_cast(self.location, index, self.i32_type);
 
-                let res = self.context.new_array_access(None, result, index);
+                let res = self.context.new_array_access(self.location, result, index);
 
                 return self.gcc_int_cast(res.to_rvalue(), arg_type);
             }
             else {
                 let count_leading_zeroes = self.context.get_builtin_function("__builtin_clzll");
-                let arg = self.context.new_cast(None, arg, self.ulonglong_type);
+                let arg = self.context.new_cast(self.location, arg, self.ulonglong_type);
                 let diff = self.ulonglong_type.get_size() as i64 - arg_type.get_size() as i64;
                 let diff = self.context.new_rvalue_from_long(self.int_type, diff * 8);
-                let res = self.context.new_call(None, count_leading_zeroes, &[arg]) - diff;
-                return self.context.new_cast(None, res, arg_type);
+                let res = self.context.new_call(self.location, count_leading_zeroes, &[arg]) - diff;
+                return self.context.new_cast(self.location, res, arg_type);
             };
         let count_leading_zeroes = self.context.get_builtin_function(count_leading_zeroes);
-        let res = self.context.new_call(None, count_leading_zeroes, &[arg]);
-        self.context.new_cast(None, res, arg_type)
+        let res = self.context.new_call(self.location, count_leading_zeroes, &[arg]);
+        self.context.new_cast(self.location, res, arg_type)
     }
 
     fn count_trailing_zeroes(&mut self, _width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
         let result_type = arg.get_type();
-        let arg =
-            if result_type.is_signed(self.cx) {
-                let new_type = result_type.to_unsigned(self.cx);
-                self.gcc_int_cast(arg, new_type)
-            }
-            else {
-                arg
-            };
+        let arg = if result_type.is_signed(self.cx) {
+            let new_type = result_type.to_unsigned(self.cx);
+            self.gcc_int_cast(arg, new_type)
+        } else {
+            arg
+        };
         let arg_type = arg.get_type();
         let (count_trailing_zeroes, expected_type) =
             // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here
@@ -766,58 +807,56 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
                 let ctzll = self.context.get_builtin_function("__builtin_ctzll");
 
-                let first_elem = self.context.new_array_access(None, result, zero);
-                let first_value = self.gcc_int_cast(self.context.new_call(None, ctzll, &[low]), arg_type);
+                let first_elem = self.context.new_array_access(self.location, result, zero);
+                let first_value = self.gcc_int_cast(self.context.new_call(self.location, ctzll, &[low]), arg_type);
                 self.llbb()
-                    .add_assignment(None, first_elem, first_value);
+                    .add_assignment(self.location, first_elem, first_value);
 
-                let second_elem = self.context.new_array_access(None, result, one);
-                let second_value = self.gcc_add(self.gcc_int_cast(self.context.new_call(None, ctzll, &[high]), arg_type), sixty_four);
+                let second_elem = self.context.new_array_access(self.location, result, one);
+                let second_value = self.gcc_add(self.gcc_int_cast(self.context.new_call(self.location, ctzll, &[high]), arg_type), sixty_four);
                 self.llbb()
-                    .add_assignment(None, second_elem, second_value);
+                    .add_assignment(self.location, second_elem, second_value);
 
-                let third_elem = self.context.new_array_access(None, result, two);
+                let third_elem = self.context.new_array_access(self.location, result, two);
                 let third_value = self.gcc_int(arg_type, 128);
                 self.llbb()
-                    .add_assignment(None, third_elem, third_value);
+                    .add_assignment(self.location, third_elem, third_value);
 
-                let not_low = self.context.new_unary_op(None, UnaryOp::LogicalNegate, self.u64_type, low);
-                let not_high = self.context.new_unary_op(None, UnaryOp::LogicalNegate, self.u64_type, high);
+                let not_low = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, low);
+                let not_high = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, high);
                 let not_low_and_not_high = not_low & not_high;
                 let index = not_low + not_low_and_not_high;
                 // NOTE: the following cast is necessary to avoid a GIMPLE verification failure in
                 // gcc.
                 // TODO(antoyo): do the correct verification in libgccjit to avoid an error at the
                 // compilation stage.
-                let index = self.context.new_cast(None, index, self.i32_type);
+                let index = self.context.new_cast(self.location, index, self.i32_type);
 
-                let res = self.context.new_array_access(None, result, index);
+                let res = self.context.new_array_access(self.location, result, index);
 
                 return self.gcc_int_cast(res.to_rvalue(), result_type);
             }
             else {
                 let count_trailing_zeroes = self.context.get_builtin_function("__builtin_ctzll");
                 let arg_size = arg_type.get_size();
-                let casted_arg = self.context.new_cast(None, arg, self.ulonglong_type);
+                let casted_arg = self.context.new_cast(self.location, arg, self.ulonglong_type);
                 let byte_diff = self.ulonglong_type.get_size() as i64 - arg_size as i64;
                 let diff = self.context.new_rvalue_from_long(self.int_type, byte_diff * 8);
                 let mask = self.context.new_rvalue_from_long(arg_type, -1); // To get the value with all bits set.
-                let masked = mask & self.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, arg);
-                let cond = self.context.new_comparison(None, ComparisonOp::Equals, masked, mask);
-                let diff = diff * self.context.new_cast(None, cond, self.int_type);
-                let res = self.context.new_call(None, count_trailing_zeroes, &[casted_arg]) - diff;
-                return self.context.new_cast(None, res, result_type);
+                let masked = mask & self.context.new_unary_op(self.location, UnaryOp::BitwiseNegate, arg_type, arg);
+                let cond = self.context.new_comparison(self.location, ComparisonOp::Equals, masked, mask);
+                let diff = diff * self.context.new_cast(self.location, cond, self.int_type);
+                let res = self.context.new_call(self.location, count_trailing_zeroes, &[casted_arg]) - diff;
+                return self.context.new_cast(self.location, res, result_type);
             };
         let count_trailing_zeroes = self.context.get_builtin_function(count_trailing_zeroes);
-        let arg =
-            if arg_type != expected_type {
-                self.context.new_cast(None, arg, expected_type)
-            }
-            else {
-                arg
-            };
-        let res = self.context.new_call(None, count_trailing_zeroes, &[arg]);
-        self.context.new_cast(None, res, result_type)
+        let arg = if arg_type != expected_type {
+            self.context.new_cast(self.location, arg, expected_type)
+        } else {
+            arg
+        };
+        let res = self.context.new_call(self.location, count_trailing_zeroes, &[arg]);
+        self.context.new_cast(self.location, res, result_type)
     }
 
     fn pop_count(&mut self, value: RValue<'gcc>) -> RValue<'gcc> {
@@ -825,13 +864,11 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         let result_type = value.get_type();
         let value_type = result_type.to_unsigned(self.cx);
 
-        let value =
-            if result_type.is_signed(self.cx) {
-                self.gcc_int_cast(value, value_type)
-            }
-            else {
-                value
-            };
+        let value = if result_type.is_signed(self.cx) {
+            self.gcc_int_cast(value, value_type)
+        } else {
+            value
+        };
 
         // only break apart 128-bit ints if they're not natively supported
         // TODO(antoyo): remove this if/when native 128-bit integers land in libgccjit
@@ -859,8 +896,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         let counter = self.current_func().new_local(None, counter_type, "popcount_counter");
         let val = self.current_func().new_local(None, value_type, "popcount_value");
         let zero = self.gcc_zero(counter_type);
-        self.llbb().add_assignment(None, counter, zero);
-        self.llbb().add_assignment(None, val, value);
+        self.llbb().add_assignment(self.location, counter, zero);
+        self.llbb().add_assignment(self.location, val, value);
         self.br(loop_head);
 
         // check if value isn't zero
@@ -874,12 +911,12 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         let one = self.gcc_int(value_type, 1);
         let sub = self.gcc_sub(val.to_rvalue(), one);
         let op = self.gcc_and(val.to_rvalue(), sub);
-        loop_body.add_assignment(None, val, op);
+        loop_body.add_assignment(self.location, val, op);
 
         // counter += 1
         let one = self.gcc_int(counter_type, 1);
         let op = self.gcc_add(counter.to_rvalue(), one);
-        loop_body.add_assignment(None, counter, op);
+        loop_body.add_assignment(self.location, counter, op);
         self.br(loop_head);
 
         // end of loop
@@ -888,66 +925,70 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
     }
 
     // Algorithm from: https://blog.regehr.org/archives/1063
-    fn rotate_left(&mut self, value: RValue<'gcc>, shift: RValue<'gcc>, width: u64) -> RValue<'gcc> {
+    fn rotate_left(
+        &mut self,
+        value: RValue<'gcc>,
+        shift: RValue<'gcc>,
+        width: u64,
+    ) -> RValue<'gcc> {
         let max = self.const_uint(shift.get_type(), width);
         let shift = self.urem(shift, max);
         let lhs = self.shl(value, shift);
         let result_neg = self.neg(shift);
-        let result_and =
-            self.and(
-                result_neg,
-                self.const_uint(shift.get_type(), width - 1),
-            );
+        let result_and = self.and(result_neg, self.const_uint(shift.get_type(), width - 1));
         let rhs = self.lshr(value, result_and);
         self.or(lhs, rhs)
     }
 
     // Algorithm from: https://blog.regehr.org/archives/1063
-    fn rotate_right(&mut self, value: RValue<'gcc>, shift: RValue<'gcc>, width: u64) -> RValue<'gcc> {
+    fn rotate_right(
+        &mut self,
+        value: RValue<'gcc>,
+        shift: RValue<'gcc>,
+        width: u64,
+    ) -> RValue<'gcc> {
         let max = self.const_uint(shift.get_type(), width);
         let shift = self.urem(shift, max);
         let lhs = self.lshr(value, shift);
         let result_neg = self.neg(shift);
-        let result_and =
-            self.and(
-                result_neg,
-                self.const_uint(shift.get_type(), width - 1),
-            );
+        let result_and = self.and(result_neg, self.const_uint(shift.get_type(), width - 1));
         let rhs = self.shl(value, result_and);
         self.or(lhs, rhs)
     }
 
-    fn saturating_add(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>, signed: bool, width: u64) -> RValue<'gcc> {
+    fn saturating_add(
+        &mut self,
+        lhs: RValue<'gcc>,
+        rhs: RValue<'gcc>,
+        signed: bool,
+        width: u64,
+    ) -> RValue<'gcc> {
         let result_type = lhs.get_type();
         if signed {
             // Based on algorithm from: https://stackoverflow.com/a/56531252/389119
             let func = self.current_func.borrow().expect("func");
-            let res = func.new_local(None, result_type, "saturating_sum");
+            let res = func.new_local(self.location, result_type, "saturating_sum");
             let supports_native_type = self.is_native_int_type(result_type);
-            let overflow =
-                if supports_native_type {
-                    let func_name =
-                        match width {
-                            8 => "__builtin_add_overflow",
-                            16 => "__builtin_add_overflow",
-                            32 => "__builtin_sadd_overflow",
-                            64 => "__builtin_saddll_overflow",
-                            128 => "__builtin_add_overflow",
-                            _ => unreachable!(),
-                        };
-                    let overflow_func = self.context.get_builtin_function(func_name);
-                    self.overflow_call(overflow_func, &[lhs, rhs, res.get_address(None)], None)
-                }
-                else {
-                    let func_name =
-                        match width {
-                            128 => "__rust_i128_addo",
-                            _ => unreachable!(),
-                        };
-                    let (int_result, overflow) = self.operation_with_overflow(func_name, lhs, rhs);
-                    self.llbb().add_assignment(None, res, int_result);
-                    overflow
+            let overflow = if supports_native_type {
+                let func_name = match width {
+                    8 => "__builtin_add_overflow",
+                    16 => "__builtin_add_overflow",
+                    32 => "__builtin_sadd_overflow",
+                    64 => "__builtin_saddll_overflow",
+                    128 => "__builtin_add_overflow",
+                    _ => unreachable!(),
+                };
+                let overflow_func = self.context.get_builtin_function(func_name);
+                self.overflow_call(overflow_func, &[lhs, rhs, res.get_address(self.location)], None)
+            } else {
+                let func_name = match width {
+                    128 => "__rust_i128_addo",
+                    _ => unreachable!(),
                 };
+                let (int_result, overflow) = self.operation_with_overflow(func_name, lhs, rhs);
+                self.llbb().add_assignment(self.location, res, int_result);
+                overflow
+            };
 
             let then_block = func.new_block("then");
             let after_block = func.new_block("after");
@@ -955,83 +996,97 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             // Return `result_type`'s maximum or minimum value on overflow
             // NOTE: convert the type to unsigned to have an unsigned shift.
             let unsigned_type = result_type.to_unsigned(&self.cx);
-            let shifted = self.gcc_lshr(self.gcc_int_cast(lhs, unsigned_type), self.gcc_int(unsigned_type, width as i64 - 1));
+            let shifted = self.gcc_lshr(
+                self.gcc_int_cast(lhs, unsigned_type),
+                self.gcc_int(unsigned_type, width as i64 - 1),
+            );
             let uint_max = self.gcc_not(self.gcc_int(unsigned_type, 0));
             let int_max = self.gcc_lshr(uint_max, self.gcc_int(unsigned_type, 1));
-            then_block.add_assignment(None, res, self.gcc_int_cast(self.gcc_add(shifted, int_max), result_type));
-            then_block.end_with_jump(None, after_block);
+            then_block.add_assignment(
+                self.location,
+                res,
+                self.gcc_int_cast(self.gcc_add(shifted, int_max), result_type),
+            );
+            then_block.end_with_jump(self.location, after_block);
 
-            self.llbb().end_with_conditional(None, overflow, then_block, after_block);
+            self.llbb().end_with_conditional(self.location, overflow, then_block, after_block);
 
             // NOTE: since jumps were added in a place rustc does not
             // expect, the current block in the state need to be updated.
             self.switch_to_block(after_block);
 
             res.to_rvalue()
-        }
-        else {
+        } else {
             // Algorithm from: http://locklessinc.com/articles/sat_arithmetic/
             let res = self.gcc_add(lhs, rhs);
             let cond = self.gcc_icmp(IntPredicate::IntULT, res, lhs);
             let value = self.gcc_neg(self.gcc_int_cast(cond, result_type));
-            self.gcc_or(res, value)
+            self.gcc_or(res, value, self.location)
         }
     }
 
     // Algorithm from: https://locklessinc.com/articles/sat_arithmetic/
-    fn saturating_sub(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>, signed: bool, width: u64) -> RValue<'gcc> {
+    fn saturating_sub(
+        &mut self,
+        lhs: RValue<'gcc>,
+        rhs: RValue<'gcc>,
+        signed: bool,
+        width: u64,
+    ) -> RValue<'gcc> {
         let result_type = lhs.get_type();
         if signed {
             // Based on algorithm from: https://stackoverflow.com/a/56531252/389119
             let func = self.current_func.borrow().expect("func");
-            let res = func.new_local(None, result_type, "saturating_diff");
+            let res = func.new_local(self.location, result_type, "saturating_diff");
             let supports_native_type = self.is_native_int_type(result_type);
-            let overflow =
-                if supports_native_type {
-                    let func_name =
-                        match width {
-                            8 => "__builtin_sub_overflow",
-                            16 => "__builtin_sub_overflow",
-                            32 => "__builtin_ssub_overflow",
-                            64 => "__builtin_ssubll_overflow",
-                            128 => "__builtin_sub_overflow",
-                            _ => unreachable!(),
-                        };
-                    let overflow_func = self.context.get_builtin_function(func_name);
-                    self.overflow_call(overflow_func, &[lhs, rhs, res.get_address(None)], None)
-                }
-                else {
-                    let func_name =
-                        match width {
-                            128 => "__rust_i128_subo",
-                            _ => unreachable!(),
-                        };
-                    let (int_result, overflow) = self.operation_with_overflow(func_name, lhs, rhs);
-                    self.llbb().add_assignment(None, res, int_result);
-                    overflow
+            let overflow = if supports_native_type {
+                let func_name = match width {
+                    8 => "__builtin_sub_overflow",
+                    16 => "__builtin_sub_overflow",
+                    32 => "__builtin_ssub_overflow",
+                    64 => "__builtin_ssubll_overflow",
+                    128 => "__builtin_sub_overflow",
+                    _ => unreachable!(),
                 };
+                let overflow_func = self.context.get_builtin_function(func_name);
+                self.overflow_call(overflow_func, &[lhs, rhs, res.get_address(self.location)], None)
+            } else {
+                let func_name = match width {
+                    128 => "__rust_i128_subo",
+                    _ => unreachable!(),
+                };
+                let (int_result, overflow) = self.operation_with_overflow(func_name, lhs, rhs);
+                self.llbb().add_assignment(self.location, res, int_result);
+                overflow
+            };
 
             let then_block = func.new_block("then");
             let after_block = func.new_block("after");
 
             // Return `result_type`'s maximum or minimum value on overflow
             // NOTE: convert the type to unsigned to have an unsigned shift.
-            let unsigned_type = result_type.to_unsigned(&self.cx);
-            let shifted = self.gcc_lshr(self.gcc_int_cast(lhs, unsigned_type), self.gcc_int(unsigned_type, width as i64 - 1));
+            let unsigned_type = result_type.to_unsigned(self.cx);
+            let shifted = self.gcc_lshr(
+                self.gcc_int_cast(lhs, unsigned_type),
+                self.gcc_int(unsigned_type, width as i64 - 1),
+            );
             let uint_max = self.gcc_not(self.gcc_int(unsigned_type, 0));
             let int_max = self.gcc_lshr(uint_max, self.gcc_int(unsigned_type, 1));
-            then_block.add_assignment(None, res, self.gcc_int_cast(self.gcc_add(shifted, int_max), result_type));
-            then_block.end_with_jump(None, after_block);
+            then_block.add_assignment(
+                self.location,
+                res,
+                self.gcc_int_cast(self.gcc_add(shifted, int_max), result_type),
+            );
+            then_block.end_with_jump(self.location, after_block);
 
-            self.llbb().end_with_conditional(None, overflow, then_block, after_block);
+            self.llbb().end_with_conditional(self.location, overflow, then_block, after_block);
 
             // NOTE: since jumps were added in a place rustc does not
             // expect, the current block in the state need to be updated.
             self.switch_to_block(after_block);
 
             res.to_rvalue()
-        }
-        else {
+        } else {
             let res = self.gcc_sub(lhs, rhs);
             let comparison = self.gcc_icmp(IntPredicate::IntULE, res, lhs);
             let value = self.gcc_neg(self.gcc_int_cast(comparison, result_type));
@@ -1040,21 +1095,26 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
     }
 }
 
-fn try_intrinsic<'a, 'b, 'gcc, 'tcx>(bx: &'b mut Builder<'a, 'gcc, 'tcx>, try_func: RValue<'gcc>, data: RValue<'gcc>, _catch_func: RValue<'gcc>, dest: RValue<'gcc>) {
+fn try_intrinsic<'a, 'b, 'gcc, 'tcx>(
+    bx: &'b mut Builder<'a, 'gcc, 'tcx>,
+    try_func: RValue<'gcc>,
+    data: RValue<'gcc>,
+    _catch_func: RValue<'gcc>,
+    dest: RValue<'gcc>,
+) {
     if bx.sess().panic_strategy() == PanicStrategy::Abort {
         bx.call(bx.type_void(), None, None, try_func, &[data], None);
         // Return 0 unconditionally from the intrinsic call;
         // we can never unwind.
         let ret_align = bx.tcx.data_layout.i32_align.abi;
         bx.store(bx.const_i32(0), dest, ret_align);
-    }
-    else if wants_msvc_seh(bx.sess()) {
-        unimplemented!();
-    }
-    else {
-        #[cfg(feature="master")]
+    } else {
+        if wants_msvc_seh(bx.sess()) {
+            unimplemented!();
+        }
+        #[cfg(feature = "master")]
         codegen_gnu_try(bx, try_func, data, _catch_func, dest);
-        #[cfg(not(feature="master"))]
+        #[cfg(not(feature = "master"))]
         unimplemented!();
     }
 }
@@ -1070,8 +1130,14 @@ fn try_intrinsic<'a, 'b, 'gcc, 'tcx>(bx: &'b mut Builder<'a, 'gcc, 'tcx>, try_fu
 // function calling it, and that function may already have other personality
 // functions in play. By calling a shim we're guaranteed that our shim will have
 // the right personality function.
-#[cfg(feature="master")]
-fn codegen_gnu_try<'gcc>(bx: &mut Builder<'_, 'gcc, '_>, try_func: RValue<'gcc>, data: RValue<'gcc>, catch_func: RValue<'gcc>, dest: RValue<'gcc>) {
+#[cfg(feature = "master")]
+fn codegen_gnu_try<'gcc>(
+    bx: &mut Builder<'_, 'gcc, '_>,
+    try_func: RValue<'gcc>,
+    data: RValue<'gcc>,
+    catch_func: RValue<'gcc>,
+    dest: RValue<'gcc>,
+) {
     let cx: &CodegenCx<'gcc, '_> = bx.cx;
     let (llty, func) = get_rust_try_fn(cx, &mut |mut bx| {
         // Codegens the shims described above:
@@ -1095,7 +1161,7 @@ fn codegen_gnu_try<'gcc>(bx: &mut Builder<'_, 'gcc, '_>, try_func: RValue<'gcc>,
         let catch_func = func.get_param(2).to_rvalue();
         let try_func_ty = bx.type_func(&[bx.type_i8p()], bx.type_void());
 
-        let current_block = bx.block.clone();
+        let current_block = bx.block;
 
         bx.switch_to_block(then);
         bx.ret(bx.const_i32(0));
@@ -1130,36 +1196,44 @@ fn codegen_gnu_try<'gcc>(bx: &mut Builder<'_, 'gcc, '_>, try_func: RValue<'gcc>,
     bx.store(ret, dest, i32_align);
 }
 
-
 // Helper function used to get a handle to the `__rust_try` function used to
 // catch exceptions.
 //
 // This function is only generated once and is then cached.
-#[cfg(feature="master")]
-fn get_rust_try_fn<'a, 'gcc, 'tcx>(cx: &'a CodegenCx<'gcc, 'tcx>, codegen: &mut dyn FnMut(Builder<'a, 'gcc, 'tcx>)) -> (Type<'gcc>, Function<'gcc>) {
+#[cfg(feature = "master")]
+fn get_rust_try_fn<'a, 'gcc, 'tcx>(
+    cx: &'a CodegenCx<'gcc, 'tcx>,
+    codegen: &mut dyn FnMut(Builder<'a, 'gcc, 'tcx>),
+) -> (Type<'gcc>, Function<'gcc>) {
     if let Some(llfn) = cx.rust_try_fn.get() {
         return llfn;
     }
 
     // Define the type up front for the signature of the rust_try function.
     let tcx = cx.tcx;
-    let i8p = Ty::new_mut_ptr(tcx,tcx.types.i8);
+    let i8p = Ty::new_mut_ptr(tcx, tcx.types.i8);
     // `unsafe fn(*mut i8) -> ()`
-    let try_fn_ty = Ty::new_fn_ptr(tcx,ty::Binder::dummy(tcx.mk_fn_sig(
-        iter::once(i8p),
-        Ty::new_unit(tcx,),
-        false,
-        rustc_hir::Unsafety::Unsafe,
-        Abi::Rust,
-    )));
+    let try_fn_ty = Ty::new_fn_ptr(
+        tcx,
+        ty::Binder::dummy(tcx.mk_fn_sig(
+            iter::once(i8p),
+            Ty::new_unit(tcx),
+            false,
+            rustc_hir::Unsafety::Unsafe,
+            Abi::Rust,
+        )),
+    );
     // `unsafe fn(*mut i8, *mut i8) -> ()`
-    let catch_fn_ty = Ty::new_fn_ptr(tcx,ty::Binder::dummy(tcx.mk_fn_sig(
-        [i8p, i8p].iter().cloned(),
-        Ty::new_unit(tcx,),
-        false,
-        rustc_hir::Unsafety::Unsafe,
-        Abi::Rust,
-    )));
+    let catch_fn_ty = Ty::new_fn_ptr(
+        tcx,
+        ty::Binder::dummy(tcx.mk_fn_sig(
+            [i8p, i8p].iter().cloned(),
+            Ty::new_unit(tcx),
+            false,
+            rustc_hir::Unsafety::Unsafe,
+            Abi::Rust,
+        )),
+    );
     // `unsafe fn(unsafe fn(*mut i8) -> (), *mut i8, unsafe fn(*mut i8, *mut i8) -> ()) -> i32`
     let rust_fn_sig = ty::Binder::dummy(cx.tcx.mk_fn_sig(
         [try_fn_ty, i8p, catch_fn_ty],
@@ -1175,8 +1249,13 @@ fn get_rust_try_fn<'a, 'gcc, 'tcx>(cx: &'a CodegenCx<'gcc, 'tcx>, codegen: &mut
 
 // Helper function to give a Block to a closure to codegen a shim function.
 // This is currently primarily used for the `try` intrinsic functions above.
-#[cfg(feature="master")]
-fn gen_fn<'a, 'gcc, 'tcx>(cx: &'a CodegenCx<'gcc, 'tcx>, name: &str, rust_fn_sig: ty::PolyFnSig<'tcx>, codegen: &mut dyn FnMut(Builder<'a, 'gcc, 'tcx>)) -> (Type<'gcc>, Function<'gcc>) {
+#[cfg(feature = "master")]
+fn gen_fn<'a, 'gcc, 'tcx>(
+    cx: &'a CodegenCx<'gcc, 'tcx>,
+    name: &str,
+    rust_fn_sig: ty::PolyFnSig<'tcx>,
+    codegen: &mut dyn FnMut(Builder<'a, 'gcc, 'tcx>),
+) -> (Type<'gcc>, Function<'gcc>) {
     let fn_abi = cx.fn_abi_of_fn_ptr(rust_fn_sig, ty::List::empty());
     let return_type = fn_abi.gcc_type(cx).return_type;
     // FIXME(eddyb) find a nicer way to do this.
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
index d8091724d86..e9af34059a0 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
@@ -1,3 +1,5 @@
+use std::iter::FromIterator;
+
 use gccjit::ToRValue;
 use gccjit::{BinaryOp, RValue, Type};
 #[cfg(feature = "master")]
@@ -19,6 +21,8 @@ use rustc_span::{sym, Span, Symbol};
 use rustc_target::abi::Align;
 
 use crate::builder::Builder;
+#[cfg(not(feature = "master"))]
+use crate::common::SignType;
 #[cfg(feature = "master")]
 use crate::context::CodegenCx;
 
@@ -156,6 +160,197 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         return Ok(compare_simd_types(bx, arg1, arg2, in_elem, llret_ty, cmp_op));
     }
 
+    let simd_bswap = |bx: &mut Builder<'a, 'gcc, 'tcx>, vector: RValue<'gcc>| -> RValue<'gcc> {
+        let v_type = vector.get_type();
+        let vector_type = v_type.unqualified().dyncast_vector().expect("vector type");
+        let elem_type = vector_type.get_element_type();
+        let elem_size_bytes = elem_type.get_size();
+        if elem_size_bytes == 1 {
+            return vector;
+        }
+
+        let type_size_bytes = elem_size_bytes as u64 * in_len;
+        let shuffle_indices = Vec::from_iter(0..type_size_bytes);
+        let byte_vector_type = bx.context.new_vector_type(bx.type_u8(), type_size_bytes);
+        let byte_vector = bx.context.new_bitcast(None, args[0].immediate(), byte_vector_type);
+
+        #[cfg(not(feature = "master"))]
+        let shuffled = {
+            let new_elements: Vec<_> = shuffle_indices
+                .chunks_exact(elem_size_bytes as _)
+                .flat_map(|x| x.iter().rev())
+                .map(|&i| {
+                    let index = bx.context.new_rvalue_from_long(bx.u64_type, i as _);
+                    bx.extract_element(byte_vector, index)
+                })
+                .collect();
+
+            bx.context.new_rvalue_from_vector(None, byte_vector_type, &new_elements)
+        };
+        #[cfg(feature = "master")]
+        let shuffled = {
+            let indices: Vec<_> = shuffle_indices
+                .chunks_exact(elem_size_bytes as _)
+                .flat_map(|x| x.iter().rev())
+                .map(|&i| bx.context.new_rvalue_from_int(bx.u8_type, i as _))
+                .collect();
+
+            let mask = bx.context.new_rvalue_from_vector(None, byte_vector_type, &indices);
+            bx.context.new_rvalue_vector_perm(None, byte_vector, byte_vector, mask)
+        };
+        bx.context.new_bitcast(None, shuffled, v_type)
+    };
+
+    if name == sym::simd_bswap || name == sym::simd_bitreverse {
+        require!(
+            bx.type_kind(bx.element_type(llret_ty)) == TypeKind::Integer,
+            InvalidMonomorphization::UnsupportedOperation { span, name, in_ty, in_elem }
+        );
+    }
+
+    if name == sym::simd_bswap {
+        return Ok(simd_bswap(bx, args[0].immediate()));
+    }
+
+    // We use a different algorithm from non-vector bitreverse to take advantage of most
+    // processors' vector shuffle units.  It works like this:
+    // 1. Generate pre-reversed low and high nibbles as a vector.
+    // 2. Byte-swap the input.
+    // 3. Mask off the low and high nibbles of each byte in the byte-swapped input.
+    // 4. Shuffle the pre-reversed low and high-nibbles using the masked nibbles as a shuffle mask.
+    // 5. Combine the results of the shuffle back together and cast back to the original type.
+    #[cfg(feature = "master")]
+    if name == sym::simd_bitreverse {
+        let vector = args[0].immediate();
+        let v_type = vector.get_type();
+        let vector_type = v_type.unqualified().dyncast_vector().expect("vector type");
+        let elem_type = vector_type.get_element_type();
+        let elem_size_bytes = elem_type.get_size();
+
+        let type_size_bytes = elem_size_bytes as u64 * in_len;
+        // We need to ensure at least 16 entries in our vector type, since the pre-reversed vectors
+        // we generate below have 16 entries in them.  `new_rvalue_vector_perm` requires the mask
+        // vector to be of the same length as the source vectors.
+        let byte_vector_type_size = type_size_bytes.max(16);
+
+        let byte_vector_type = bx.context.new_vector_type(bx.u8_type, type_size_bytes);
+        let long_byte_vector_type = bx.context.new_vector_type(bx.u8_type, byte_vector_type_size);
+
+        // Step 1: Generate pre-reversed low and high nibbles as a vector.
+        let zero_byte = bx.context.new_rvalue_zero(bx.u8_type);
+        let hi_nibble_elements: Vec<_> = (0u8..16)
+            .map(|x| bx.context.new_rvalue_from_int(bx.u8_type, x.reverse_bits() as _))
+            .chain((16..byte_vector_type_size).map(|_| zero_byte))
+            .collect();
+        let hi_nibble =
+            bx.context.new_rvalue_from_vector(None, long_byte_vector_type, &hi_nibble_elements);
+
+        let lo_nibble_elements: Vec<_> = (0u8..16)
+            .map(|x| bx.context.new_rvalue_from_int(bx.u8_type, (x.reverse_bits() >> 4) as _))
+            .chain((16..byte_vector_type_size).map(|_| zero_byte))
+            .collect();
+        let lo_nibble =
+            bx.context.new_rvalue_from_vector(None, long_byte_vector_type, &lo_nibble_elements);
+
+        let mask = bx.context.new_rvalue_from_vector(
+            None,
+            long_byte_vector_type,
+            &vec![bx.context.new_rvalue_from_int(bx.u8_type, 0x0f); byte_vector_type_size as _],
+        );
+
+        let four_vec = bx.context.new_rvalue_from_vector(
+            None,
+            long_byte_vector_type,
+            &vec![bx.context.new_rvalue_from_int(bx.u8_type, 4); byte_vector_type_size as _],
+        );
+
+        // Step 2: Byte-swap the input.
+        let swapped = simd_bswap(bx, args[0].immediate());
+        let byte_vector = bx.context.new_bitcast(None, swapped, byte_vector_type);
+
+        // We're going to need to extend the vector with zeros to make sure that the types are the
+        // same, since that's what new_rvalue_vector_perm expects.
+        let byte_vector = if byte_vector_type_size > type_size_bytes {
+            let mut byte_vector_elements = Vec::with_capacity(byte_vector_type_size as _);
+            for i in 0..type_size_bytes {
+                let idx = bx.context.new_rvalue_from_int(bx.u32_type, i as _);
+                let val = bx.extract_element(byte_vector, idx);
+                byte_vector_elements.push(val);
+            }
+            for _ in type_size_bytes..byte_vector_type_size {
+                byte_vector_elements.push(zero_byte);
+            }
+            bx.context.new_rvalue_from_vector(None, long_byte_vector_type, &byte_vector_elements)
+        } else {
+            bx.context.new_bitcast(None, byte_vector, long_byte_vector_type)
+        };
+
+        // Step 3: Mask off the low and high nibbles of each byte in the byte-swapped input.
+        let masked_hi = (byte_vector >> four_vec) & mask;
+        let masked_lo = byte_vector & mask;
+
+        // Step 4: Shuffle the pre-reversed low and high-nibbles using the masked nibbles as a shuffle mask.
+        let hi = bx.context.new_rvalue_vector_perm(None, hi_nibble, hi_nibble, masked_lo);
+        let lo = bx.context.new_rvalue_vector_perm(None, lo_nibble, lo_nibble, masked_hi);
+
+        // Step 5: Combine the results of the shuffle back together and cast back to the original type.
+        let result = hi | lo;
+        let cast_ty =
+            bx.context.new_vector_type(elem_type, byte_vector_type_size / (elem_size_bytes as u64));
+
+        // we might need to truncate if sizeof(v_type) < sizeof(cast_type)
+        if type_size_bytes < byte_vector_type_size {
+            let cast_result = bx.context.new_bitcast(None, result, cast_ty);
+            let elems: Vec<_> = (0..in_len)
+                .map(|i| {
+                    let idx = bx.context.new_rvalue_from_int(bx.u32_type, i as _);
+                    bx.extract_element(cast_result, idx)
+                })
+                .collect();
+            return Ok(bx.context.new_rvalue_from_vector(None, v_type, &elems));
+        } else {
+            // avoid the unnecessary truncation as an optimization.
+            return Ok(bx.context.new_bitcast(None, result, v_type));
+        }
+    }
+    // since gcc doesn't have vector shuffle methods available in non-patched builds, fallback to
+    // component-wise bitreverses if they're not available.
+    #[cfg(not(feature = "master"))]
+    if name == sym::simd_bitreverse {
+        let vector = args[0].immediate();
+        let vector_ty = vector.get_type();
+        let vector_type = vector_ty.unqualified().dyncast_vector().expect("vector type");
+        let num_elements = vector_type.get_num_units();
+
+        let elem_type = vector_type.get_element_type();
+        let elem_size_bytes = elem_type.get_size();
+        let num_type = elem_type.to_unsigned(bx.cx);
+        let new_elements: Vec<_> = (0..num_elements)
+            .map(|idx| {
+                let index = bx.context.new_rvalue_from_long(num_type, idx as _);
+                let extracted_value = bx.extract_element(vector, index).to_rvalue();
+                bx.bit_reverse(elem_size_bytes as u64 * 8, extracted_value)
+            })
+            .collect();
+        return Ok(bx.context.new_rvalue_from_vector(None, vector_ty, &new_elements));
+    }
+
+    if name == sym::simd_ctlz || name == sym::simd_cttz {
+        let vector = args[0].immediate();
+        let elements: Vec<_> = (0..in_len)
+            .map(|i| {
+                let index = bx.context.new_rvalue_from_long(bx.i32_type, i as i64);
+                let value = bx.extract_element(vector, index).to_rvalue();
+                if name == sym::simd_ctlz {
+                    bx.count_leading_zeroes(value.get_type().get_size() as u64 * 8, value)
+                } else {
+                    bx.count_trailing_zeroes(value.get_type().get_size() as u64 * 8, value)
+                }
+            })
+            .collect();
+        return Ok(bx.context.new_rvalue_from_vector(None, vector.get_type(), &elements));
+    }
+
     if name == sym::simd_shuffle {
         // Make sure this is actually an array, since typeck only checks the length-suffixed
         // version of this intrinsic.
@@ -504,20 +699,15 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         default: RValue<'gcc>,
         pointers: RValue<'gcc>,
         mask: RValue<'gcc>,
-        pointer_count: usize,
         bx: &mut Builder<'a, 'gcc, 'tcx>,
         in_len: u64,
-        underlying_ty: Ty<'tcx>,
         invert: bool,
     ) -> RValue<'gcc> {
-        let vector_type = if pointer_count > 1 {
-            bx.context.new_vector_type(bx.usize_type, in_len)
-        } else {
-            vector_ty(bx, underlying_ty, in_len)
-        };
-        let elem_type = vector_type.dyncast_vector().expect("vector type").get_element_type();
+        let vector_type = default.get_type();
+        let elem_type =
+            vector_type.unqualified().dyncast_vector().expect("vector type").get_element_type();
 
-        let mut values = vec![];
+        let mut values = Vec::with_capacity(in_len as usize);
         for i in 0..in_len {
             let index = bx.context.new_rvalue_from_long(bx.i32_type, i as i64);
             let int = bx.context.new_vector_access(None, pointers, index).to_rvalue();
@@ -530,13 +720,15 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
 
         let vector = bx.context.new_rvalue_from_vector(None, vector_type, &values);
 
-        let mut mask_types = vec![];
-        let mut mask_values = vec![];
+        let mut mask_types = Vec::with_capacity(in_len as usize);
+        let mut mask_values = Vec::with_capacity(in_len as usize);
         for i in 0..in_len {
             let index = bx.context.new_rvalue_from_long(bx.i32_type, i as i64);
             mask_types.push(bx.context.new_field(None, bx.i32_type, "m"));
             let mask_value = bx.context.new_vector_access(None, mask, index).to_rvalue();
-            let masked = bx.context.new_rvalue_from_int(bx.i32_type, in_len as i32) & mask_value;
+            let mask_value_cast = bx.context.new_cast(None, mask_value, bx.i32_type);
+            let masked =
+                bx.context.new_rvalue_from_int(bx.i32_type, in_len as i32) & mask_value_cast;
             let value = index + masked;
             mask_values.push(value);
         }
@@ -665,10 +857,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             args[0].immediate(),
             args[1].immediate(),
             args[2].immediate(),
-            pointer_count,
             bx,
             in_len,
-            underlying_ty,
             false,
         ));
     }
@@ -779,16 +969,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             }
         }
 
-        let result = gather(
-            args[0].immediate(),
-            args[1].immediate(),
-            args[2].immediate(),
-            pointer_count,
-            bx,
-            in_len,
-            underlying_ty,
-            true,
-        );
+        let result =
+            gather(args[0].immediate(), args[1].immediate(), args[2].immediate(), bx, in_len, true);
 
         let pointers = args[1].immediate();