about summary refs log tree commit diff
diff options
context:
space:
mode:
authorAntoni Boucher <bouanto@zoho.com>2022-06-05 13:36:17 -0400
committerAntoni Boucher <bouanto@zoho.com>2022-06-06 22:08:07 -0400
commite5a1bb2f59c49a6a741b8ee5b20ad16b93f314c6 (patch)
tree559ccd4209ba5ad6373958045e70b536757b2779
parentfe606eb4449e07e21959a52ae07ca12be3955209 (diff)
downloadrust-e5a1bb2f59c49a6a741b8ee5b20ad16b93f314c6.tar.gz
rust-e5a1bb2f59c49a6a741b8ee5b20ad16b93f314c6.zip
Add more SIMD
-rw-r--r--src/builder.rs4
-rw-r--r--src/intrinsic/llvm.rs105
2 files changed, 99 insertions, 10 deletions
diff --git a/src/builder.rs b/src/builder.rs
index aefd4eecc0a..3e1f56c183a 100644
--- a/src/builder.rs
+++ b/src/builder.rs
@@ -277,7 +277,9 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
     fn function_ptr_call(&mut self, func_ptr: RValue<'gcc>, args: &[RValue<'gcc>], _funclet: Option<&Funclet>) -> RValue<'gcc> {
         let gcc_func = func_ptr.get_type().dyncast_function_ptr_type().expect("function ptr");
         let func_name = format!("{:?}", func_ptr);
+        let previous_arg_count = args.len();
         let args = llvm::adjust_intrinsic_arguments(&self, gcc_func, args.into(), &func_name);
+        let args_adjusted = args.len() != previous_arg_count;
         let args = self.check_ptr_call("call", func_ptr, &*args);
 
         // gccjit requires to use the result of functions, even when it's not used.
@@ -289,7 +291,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         if return_type != void_type {
             unsafe { RETURN_VALUE_COUNT += 1 };
             let return_value = self.cx.context.new_call_through_ptr(None, func_ptr, &args);
-            let return_value = llvm::adjust_intrinsic_return_value(&self, return_value, &func_name, &args);
+            let return_value = llvm::adjust_intrinsic_return_value(&self, return_value, &func_name, &args, args_adjusted);
             let result = current_func.new_local(None, return_value.get_type(), &format!("ptrReturnValue{}", unsafe { RETURN_VALUE_COUNT }));
             self.block.add_assignment(None, result, return_value);
             result.to_rvalue()
diff --git a/src/intrinsic/llvm.rs b/src/intrinsic/llvm.rs
index 29387712dae..af6c121c337 100644
--- a/src/intrinsic/llvm.rs
+++ b/src/intrinsic/llvm.rs
@@ -53,6 +53,15 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
                 | "__builtin_ia32_pmaxuw512_mask" | "__builtin_ia32_pmaxub512_mask" | "__builtin_ia32_pmaxsw512_mask"
                 | "__builtin_ia32_pmaxsb512_mask" | "__builtin_ia32_pminuw512_mask" | "__builtin_ia32_pminub512_mask"
                 | "__builtin_ia32_pminsw512_mask" | "__builtin_ia32_pminsb512_mask"
+                | "__builtin_ia32_pmaddwd512_mask" | "__builtin_ia32_pmaddubsw512_mask" | "__builtin_ia32_packssdw512_mask"
+                | "__builtin_ia32_packsswb512_mask" | "__builtin_ia32_packusdw512_mask" | "__builtin_ia32_packuswb512_mask"
+                | "__builtin_ia32_pavgw512_mask" | "__builtin_ia32_pavgb512_mask" | "__builtin_ia32_psllw512_mask"
+                | "__builtin_ia32_psllwi512_mask" | "__builtin_ia32_psllv32hi_mask" | "__builtin_ia32_psrlw512_mask"
+                | "__builtin_ia32_psrlwi512_mask" | "__builtin_ia32_psllv16hi_mask" | "__builtin_ia32_psllv8hi_mask"
+                | "__builtin_ia32_psrlv32hi_mask" | "__builtin_ia32_psraw512_mask" | "__builtin_ia32_psrawi512_mask"
+                | "__builtin_ia32_psrlv16hi_mask" | "__builtin_ia32_psrlv8hi_mask" | "__builtin_ia32_psrav32hi_mask"
+                | "__builtin_ia32_permvarhi512_mask" | "__builtin_ia32_pshufb512_mask" | "__builtin_ia32_psrav16hi_mask"
+                | "__builtin_ia32_psrav8hi_mask" | "__builtin_ia32_permvarhi256_mask" | "__builtin_ia32_permvarhi128_mask"
                 => {
                 let mut new_args = args.to_vec();
                 let arg3_type = gcc_func.get_param_type(2);
@@ -66,6 +75,19 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
                 new_args.push(minus_one);
                 args = new_args.into();
             },
+            "__builtin_ia32_dbpsadbw512_mask" | "__builtin_ia32_dbpsadbw256_mask" | "__builtin_ia32_dbpsadbw128_mask" => {
+                let mut new_args = args.to_vec();
+                let arg4_type = gcc_func.get_param_type(3);
+                let vector_type = arg4_type.dyncast_vector().expect("vector type");
+                let zero = builder.context.new_rvalue_zero(vector_type.get_element_type());
+                let num_units = vector_type.get_num_units();
+                let first_arg = builder.context.new_rvalue_from_vector(None, arg4_type, &vec![zero; num_units]);
+                new_args.push(first_arg);
+                let arg5_type = gcc_func.get_param_type(4);
+                let minus_one = builder.context.new_rvalue_from_int(arg5_type, -1);
+                new_args.push(minus_one);
+                args = new_args.into();
+            },
             "__builtin_ia32_vplzcntd_512_mask" | "__builtin_ia32_vplzcntd_256_mask" | "__builtin_ia32_vplzcntd_128_mask"
                 | "__builtin_ia32_vplzcntq_512_mask" | "__builtin_ia32_vplzcntq_256_mask" | "__builtin_ia32_vplzcntq_128_mask" => {
                 let mut new_args = args.to_vec();
@@ -186,7 +208,7 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
             "__builtin_ia32_stmxcsr" => {
                 args = vec![].into();
             },
-            "__builtin_ia32_addcarryx_u64" | "__builtin_ia32_sbb_u64" => {
+            "__builtin_ia32_addcarryx_u64" | "__builtin_ia32_sbb_u64" | "__builtin_ia32_addcarryx_u32" | "__builtin_ia32_sbb_u32" => {
                 let mut new_args = args.to_vec();
                 let arg2_type = gcc_func.get_param_type(1);
                 let variable = builder.current_func().new_local(None, arg2_type, "addcarryResult");
@@ -194,12 +216,22 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
                 args = new_args.into();
             },
             "__builtin_ia32_vpermt2varqi512_mask" | "__builtin_ia32_vpermt2varqi256_mask"
-                | "__builtin_ia32_vpermt2varqi128_mask" => {
+                | "__builtin_ia32_vpermt2varqi128_mask" | "__builtin_ia32_vpermt2varhi512_mask"
+                | "__builtin_ia32_vpermt2varhi256_mask" | "__builtin_ia32_vpermt2varhi128_mask"
+                => {
                 let new_args = args.to_vec();
                 let arg4_type = gcc_func.get_param_type(3);
                 let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
                 args = vec![new_args[1], new_args[0], new_args[2], minus_one].into();
             },
+            "__builtin_ia32_xrstor" | "__builtin_ia32_xsavec" => {
+                let new_args = args.to_vec();
+                let thirty_two = builder.context.new_rvalue_from_int(new_args[1].get_type(), 32);
+                let arg2 = new_args[1] << thirty_two | new_args[2];
+                let arg2_type = gcc_func.get_param_type(1);
+                let arg2 = builder.context.new_cast(None, arg2, arg2_type);
+                args = vec![new_args[0], arg2].into();
+            },
             _ => (),
         }
     }
@@ -249,18 +281,24 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
     args
 }
 
-pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc, 'tcx>, mut return_value: RValue<'gcc>, func_name: &str, args: &[RValue<'gcc>]) -> RValue<'gcc> {
+pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc, 'tcx>, mut return_value: RValue<'gcc>, func_name: &str, args: &[RValue<'gcc>], args_adjusted: bool) -> RValue<'gcc> {
     match func_name {
         "__builtin_ia32_vfmaddss3_round" | "__builtin_ia32_vfmaddsd3_round" => {
             let zero = builder.context.new_rvalue_zero(builder.int_type);
             return_value = builder.context.new_vector_access(None, return_value, zero).to_rvalue();
         },
-        "__builtin_ia32_addcarryx_u64" | "__builtin_ia32_sbb_u64" => {
-            let last_arg = args.last().expect("last arg");
-            let field1 = builder.context.new_field(None, builder.u8_type, "carryFlag");
-            let field2 = builder.context.new_field(None, builder.ulonglong_type, "carryResult");
-            let struct_type = builder.context.new_struct_type(None, "addcarryResult", &[field1, field2]);
-            return_value = builder.context.new_struct_constructor(None, struct_type.as_type(), None, &[return_value, last_arg.dereference(None).to_rvalue()]);
+        "__builtin_ia32_addcarryx_u64" | "__builtin_ia32_sbb_u64" | "__builtin_ia32_addcarryx_u32" | "__builtin_ia32_sbb_u32" => {
+            // Both llvm.x86.addcarry.32 and llvm.x86.addcarryx.u32 points to the same GCC builtin,
+            // but only the former requires adjusting the return value.
+            // Those 2 LLVM intrinsics differ by their argument count, that's why we check if the
+            // arguments were adjusted.
+            if args_adjusted {
+                let last_arg = args.last().expect("last arg");
+                let field1 = builder.context.new_field(None, builder.u8_type, "carryFlag");
+                let field2 = builder.context.new_field(None, args[1].get_type(), "carryResult");
+                let struct_type = builder.context.new_struct_type(None, "addcarryResult", &[field1, field2]);
+                return_value = builder.context.new_struct_constructor(None, struct_type.as_type(), None, &[return_value, last_arg.dereference(None).to_rvalue()]);
+            }
         },
         _ => (),
     }
@@ -450,6 +488,22 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
         "llvm.x86.avx512.mask.vpshufbitqmb.512" => "__builtin_ia32_vpshufbitqmb512_mask",
         "llvm.x86.avx512.mask.vpshufbitqmb.256" => "__builtin_ia32_vpshufbitqmb256_mask",
         "llvm.x86.avx512.mask.vpshufbitqmb.128" => "__builtin_ia32_vpshufbitqmb128_mask",
+        "llvm.x86.avx512.mask.ucmp.w.512" => "__builtin_ia32_ucmpw512_mask",
+        "llvm.x86.avx512.mask.ucmp.w.256" => "__builtin_ia32_ucmpw256_mask",
+        "llvm.x86.avx512.mask.ucmp.w.128" => "__builtin_ia32_ucmpw128_mask",
+        "llvm.x86.avx512.mask.ucmp.b.512" => "__builtin_ia32_ucmpb512_mask",
+        "llvm.x86.avx512.mask.ucmp.b.256" => "__builtin_ia32_ucmpb256_mask",
+        "llvm.x86.avx512.mask.ucmp.b.128" => "__builtin_ia32_ucmpb128_mask",
+        "llvm.x86.avx512.mask.cmp.w.512" => "__builtin_ia32_cmpw512_mask",
+        "llvm.x86.avx512.mask.cmp.w.256" => "__builtin_ia32_cmpw256_mask",
+        "llvm.x86.avx512.mask.cmp.w.128" => "__builtin_ia32_cmpw128_mask",
+        "llvm.x86.avx512.mask.cmp.b.512" => "__builtin_ia32_cmpb512_mask",
+        "llvm.x86.avx512.mask.cmp.b.256" => "__builtin_ia32_cmpb256_mask",
+        "llvm.x86.avx512.mask.cmp.b.128" => "__builtin_ia32_cmpb128_mask",
+        "llvm.x86.xrstor" => "__builtin_ia32_xrstor",
+        "llvm.x86.xsavec" => "__builtin_ia32_xsavec",
+        "llvm.x86.addcarry.32" => "__builtin_ia32_addcarryx_u32",
+        "llvm.x86.subborrow.32" => "__builtin_ia32_sbb_u32",
 
         // The above doc points to unknown builtins for the following, so override them:
         "llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si",
@@ -543,6 +597,39 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
         "llvm.x86.avx512.pmulhu.w.512" => "__builtin_ia32_pmulhuw512_mask",
         "llvm.x86.avx512.pmulh.w.512" => "__builtin_ia32_pmulhw512_mask",
         "llvm.x86.avx512.pmul.hr.sw.512" => "__builtin_ia32_pmulhrsw512_mask",
+        "llvm.x86.avx512.pmaddw.d.512" => "__builtin_ia32_pmaddwd512_mask",
+        "llvm.x86.avx512.pmaddubs.w.512" => "__builtin_ia32_pmaddubsw512_mask",
+        "llvm.x86.avx512.packssdw.512" => "__builtin_ia32_packssdw512_mask",
+        "llvm.x86.avx512.packsswb.512" => "__builtin_ia32_packsswb512_mask",
+        "llvm.x86.avx512.packusdw.512" => "__builtin_ia32_packusdw512_mask",
+        "llvm.x86.avx512.packuswb.512" => "__builtin_ia32_packuswb512_mask",
+        "llvm.x86.avx512.pavg.w.512" => "__builtin_ia32_pavgw512_mask",
+        "llvm.x86.avx512.pavg.b.512" => "__builtin_ia32_pavgb512_mask",
+        "llvm.x86.avx512.psll.w.512" => "__builtin_ia32_psllw512_mask",
+        "llvm.x86.avx512.pslli.w.512" => "__builtin_ia32_psllwi512_mask",
+        "llvm.x86.avx512.psllv.w.512" => "__builtin_ia32_psllv32hi_mask",
+        "llvm.x86.avx512.psllv.w.256" => "__builtin_ia32_psllv16hi_mask",
+        "llvm.x86.avx512.psllv.w.128" => "__builtin_ia32_psllv8hi_mask",
+        "llvm.x86.avx512.psrl.w.512" => "__builtin_ia32_psrlw512_mask",
+        "llvm.x86.avx512.psrli.w.512" => "__builtin_ia32_psrlwi512_mask",
+        "llvm.x86.avx512.psrlv.w.512" => "__builtin_ia32_psrlv32hi_mask",
+        "llvm.x86.avx512.psrlv.w.256" => "__builtin_ia32_psrlv16hi_mask",
+        "llvm.x86.avx512.psrlv.w.128" => "__builtin_ia32_psrlv8hi_mask",
+        "llvm.x86.avx512.psra.w.512" => "__builtin_ia32_psraw512_mask",
+        "llvm.x86.avx512.psrai.w.512" => "__builtin_ia32_psrawi512_mask",
+        "llvm.x86.avx512.psrav.w.512" => "__builtin_ia32_psrav32hi_mask",
+        "llvm.x86.avx512.psrav.w.256" => "__builtin_ia32_psrav16hi_mask",
+        "llvm.x86.avx512.psrav.w.128" => "__builtin_ia32_psrav8hi_mask",
+        "llvm.x86.avx512.vpermi2var.hi.512" => "__builtin_ia32_vpermt2varhi512_mask",
+        "llvm.x86.avx512.vpermi2var.hi.256" => "__builtin_ia32_vpermt2varhi256_mask",
+        "llvm.x86.avx512.vpermi2var.hi.128" => "__builtin_ia32_vpermt2varhi128_mask",
+        "llvm.x86.avx512.permvar.hi.512" => "__builtin_ia32_permvarhi512_mask",
+        "llvm.x86.avx512.permvar.hi.256" => "__builtin_ia32_permvarhi256_mask",
+        "llvm.x86.avx512.permvar.hi.128" => "__builtin_ia32_permvarhi128_mask",
+        "llvm.x86.avx512.pshuf.b.512" => "__builtin_ia32_pshufb512_mask",
+        "llvm.x86.avx512.dbpsadbw.512" => "__builtin_ia32_dbpsadbw512_mask",
+        "llvm.x86.avx512.dbpsadbw.256" => "__builtin_ia32_dbpsadbw256_mask",
+        "llvm.x86.avx512.dbpsadbw.128" => "__builtin_ia32_dbpsadbw128_mask",
 
         // NOTE: this file is generated by https://github.com/GuillaumeGomez/llvmint/blob/master/generate_list.py
         _ => include!("archs.rs"),