about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--src/builder.rs104
-rw-r--r--src/context.rs14
-rw-r--r--src/declare.rs5
-rw-r--r--src/intrinsic/archs.rs5
-rw-r--r--src/intrinsic/llvm.rs18
5 files changed, 141 insertions, 5 deletions
diff --git a/src/builder.rs b/src/builder.rs
index a53115c578b..82b0e64e582 100644
--- a/src/builder.rs
+++ b/src/builder.rs
@@ -217,11 +217,27 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             return Cow::Borrowed(args);
         }
 
+        let func_name = format!("{:?}", func_ptr);
+
         let casted_args: Vec<_> = param_types
             .into_iter()
             .zip(args.iter())
             .enumerate()
             .map(|(index, (expected_ty, &actual_val))| {
+                // NOTE: these intrinsics have missing parameters before the last one, so ignore the
+                // last argument type check.
+                // FIXME(antoyo): find a way to refactor in order to avoid this hack.
+                match &*func_name {
+                    "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
+                    | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" | "__builtin_ia32_sqrtps512_mask"
+                    | "__builtin_ia32_sqrtpd512_mask" => {
+                        if index == args.len() - 1 {
+                            return actual_val;
+                        }
+                    },
+                    _ => (),
+                }
+
                 let actual_ty = actual_val.get_type();
                 if expected_ty != actual_ty {
                     if !actual_ty.is_vector() && !expected_ty.is_vector() && actual_ty.is_integral() && expected_ty.is_integral() && actual_ty.get_size() != expected_ty.get_size() {
@@ -286,7 +302,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
     }
 
     fn function_ptr_call(&mut self, func_ptr: RValue<'gcc>, args: &[RValue<'gcc>], _funclet: Option<&Funclet>) -> RValue<'gcc> {
-        let args = self.check_ptr_call("call", func_ptr, args);
+        let mut args = self.check_ptr_call("call", func_ptr, args);
 
         // gccjit requires to use the result of functions, even when it's not used.
         // That's why we assign the result to a local or call add_eval().
@@ -298,6 +314,92 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         if return_type != void_type {
             unsafe { RETURN_VALUE_COUNT += 1 };
             let result = current_func.new_local(None, return_type, &format!("ptrReturnValue{}", unsafe { RETURN_VALUE_COUNT }));
+            // Some LLVM intrinsics do not map 1-to-1 to GCC intrinsics, so we add the missing
+            // arguments here.
+            if gcc_func.get_param_count() != args.len() {
+                let func_name = format!("{:?}", func_ptr);
+                match &*func_name {
+                    "__builtin_ia32_pmuldq512_mask" | "__builtin_ia32_pmuludq512_mask"
+                    // FIXME(antoyo): the following intrinsics has 4 (or 5) arguments according to the doc, but is defined with 2 (or 3) arguments in library/stdarch/crates/core_arch/src/x86/avx512f.rs.
+                    | "__builtin_ia32_pmaxsd512_mask" | "__builtin_ia32_pmaxsq512_mask" | "__builtin_ia32_pmaxsq256_mask"
+                    | "__builtin_ia32_pmaxsq128_mask" | "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
+                    | "__builtin_ia32_pmaxud512_mask" | "__builtin_ia32_pmaxuq512_mask" | "__builtin_ia32_pmaxuq256_mask"
+                    | "__builtin_ia32_pmaxuq128_mask"
+                    | "__builtin_ia32_pminsd512_mask" | "__builtin_ia32_pminsq512_mask" | "__builtin_ia32_pminsq256_mask"
+                    | "__builtin_ia32_pminsq128_mask" | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
+                    | "__builtin_ia32_pminud512_mask" | "__builtin_ia32_pminuq512_mask" | "__builtin_ia32_pminuq256_mask"
+                    | "__builtin_ia32_pminuq128_mask" | "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask"
+                    => {
+                        // TODO: refactor by separating those intrinsics outside of this branch.
+                        let add_before_last_arg =
+                            match &*func_name {
+                                "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
+                                | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
+                                | "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => true,
+                                _ => false,
+                            };
+                        let new_first_arg_is_zero =
+                            match &*func_name {
+                                "__builtin_ia32_pmaxuq256_mask" | "__builtin_ia32_pmaxuq128_mask"
+                                | "__builtin_ia32_pminuq256_mask" | "__builtin_ia32_pminuq128_mask" => true,
+                                _ => false
+                            };
+                        let arg3_index =
+                            match &*func_name {
+                                "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 1,
+                                _ => 2,
+                            };
+                        let mut new_args = args.to_vec();
+                        let arg3_type = gcc_func.get_param_type(arg3_index);
+                        let first_arg =
+                            if new_first_arg_is_zero {
+                                let vector_type = arg3_type.dyncast_vector().expect("vector type");
+                                let zero = self.context.new_rvalue_zero(vector_type.get_element_type());
+                                let num_units = vector_type.get_num_units();
+                                self.context.new_rvalue_from_vector(None, arg3_type, &vec![zero; num_units])
+                            }
+                            else {
+                                self.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue()
+                            };
+                        if add_before_last_arg {
+                            new_args.insert(new_args.len() - 1, first_arg);
+                        }
+                        else {
+                            new_args.push(first_arg);
+                        }
+                        let arg4_index =
+                            match &*func_name {
+                                "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 2,
+                                _ => 3,
+                            };
+                        let arg4_type = gcc_func.get_param_type(arg4_index);
+                        let minus_one = self.context.new_rvalue_from_int(arg4_type, -1);
+                        if add_before_last_arg {
+                            new_args.insert(new_args.len() - 1, minus_one);
+                        }
+                        else {
+                            new_args.push(minus_one);
+                        }
+                        args = new_args.into();
+                    },
+                    "__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
+                        let mut new_args = args.to_vec();
+                        if args.len() == 3 {
+                            // Both llvm.fma.v16f32 and llvm.x86.avx512.vfmaddsub.ps.512 maps to
+                            // the same GCC intrinsic, but the former has 3 parameters and the
+                            // latter has 4 so it doesn't require this additional argument.
+                            let arg4_type = gcc_func.get_param_type(3);
+                            let minus_one = self.context.new_rvalue_from_int(arg4_type, -1);
+                            new_args.push(minus_one);
+                        }
+
+                        let arg5_type = gcc_func.get_param_type(4);
+                        new_args.push(self.context.new_rvalue_from_int(arg5_type, 4));
+                        args = new_args.into();
+                    },
+                    _ => (),
+                }
+            }
             self.block.add_assignment(None, result, self.cx.context.new_call_through_ptr(None, func_ptr, &args));
             result.to_rvalue()
         }
diff --git a/src/context.rs b/src/context.rs
index 92b30ef9b4d..4bc8c5a6760 100644
--- a/src/context.rs
+++ b/src/context.rs
@@ -35,6 +35,7 @@ pub struct CodegenCx<'gcc, 'tcx> {
     pub normal_function_addresses: RefCell<FxHashSet<RValue<'gcc>>>,
 
     pub functions: RefCell<FxHashMap<String, Function<'gcc>>>,
+    pub intrinsics: RefCell<FxHashMap<String, Function<'gcc>>>,
 
     pub tls_model: gccjit::TlsModel,
 
@@ -184,6 +185,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
             current_func: RefCell::new(None),
             normal_function_addresses: Default::default(),
             functions: RefCell::new(functions),
+            intrinsics: RefCell::new(FxHashMap::default()),
 
             tls_model,
 
@@ -315,8 +317,16 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
     }
 
     fn get_fn_addr(&self, instance: Instance<'tcx>) -> RValue<'gcc> {
-        let func = get_fn(self, instance);
-        let func = self.rvalue_as_function(func);
+        let func_name = self.tcx.symbol_name(instance).name;
+
+        let func =
+            if self.intrinsics.borrow().contains_key(func_name) {
+                self.intrinsics.borrow()[func_name].clone()
+            }
+            else {
+                let func = get_fn(self, instance);
+                self.rvalue_as_function(func)
+            };
         let ptr = func.get_address(None);
 
         // TODO(antoyo): don't do this twice: i.e. in declare_fn and here.
diff --git a/src/declare.rs b/src/declare.rs
index 43017376916..8b2146c5aa8 100644
--- a/src/declare.rs
+++ b/src/declare.rs
@@ -11,6 +11,7 @@ use crate::intrinsic::llvm;
 impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
     pub fn get_or_insert_global(&self, name: &str, ty: Type<'gcc>, is_tls: bool, link_section: Option<Symbol>) -> LValue<'gcc> {
         if self.globals.borrow().contains_key(name) {
+            // TODO: use [] instead of .get().expect()?
             let typ = self.globals.borrow().get(name).expect("global").get_type();
             let global = self.context.new_global(None, GlobalKind::Imported, typ, name);
             if is_tls {
@@ -103,7 +104,9 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
 /// update the declaration and return existing Value instead.
 fn declare_raw_fn<'gcc>(cx: &CodegenCx<'gcc, '_>, name: &str, _callconv: () /*llvm::CallConv*/, return_type: Type<'gcc>, param_types: &[Type<'gcc>], variadic: bool) -> Function<'gcc> {
     if name.starts_with("llvm.") {
-        return llvm::intrinsic(name, cx);
+        let intrinsic = llvm::intrinsic(name, cx);
+        cx.intrinsics.borrow_mut().insert(name.to_string(), intrinsic);
+        return intrinsic;
     }
     let func =
         if cx.functions.borrow().contains_key(name) {
diff --git a/src/intrinsic/archs.rs b/src/intrinsic/archs.rs
index bfeb30f2913..fbcfc8be859 100644
--- a/src/intrinsic/archs.rs
+++ b/src/intrinsic/archs.rs
@@ -4275,5 +4275,8 @@ match name {
     "llvm.xcore.getid" => "__builtin_getid",
     "llvm.xcore.getps" => "__builtin_getps",
     "llvm.xcore.setps" => "__builtin_setps",
-    _ => unimplemented!("***** unsupported LLVM intrinsic {}", name),
+    _ => {
+        println!("***** unsupported LLVM intrinsic {}", name);
+        ""
+    },
 }
diff --git a/src/intrinsic/llvm.rs b/src/intrinsic/llvm.rs
index aab93b92755..16f0df8a207 100644
--- a/src/intrinsic/llvm.rs
+++ b/src/intrinsic/llvm.rs
@@ -21,6 +21,24 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
         "llvm.x86.xgetbv" => "__builtin_ia32_xgetbv",
         // NOTE: this doc specifies the equivalent GCC builtins: http://huonw.github.io/llvmint/llvmint/x86/index.html
         "llvm.sqrt.v2f64" => "__builtin_ia32_sqrtpd",
+        "llvm.x86.avx512.pmul.dq.512" => "__builtin_ia32_pmuldq512_mask",
+        "llvm.x86.avx512.pmulu.dq.512" => "__builtin_ia32_pmuludq512_mask",
+        "llvm.x86.avx512.mask.pmaxs.q.256" => "__builtin_ia32_pmaxsq256_mask",
+        "llvm.x86.avx512.mask.pmaxs.q.128" => "__builtin_ia32_pmaxsq128_mask",
+        "llvm.x86.avx512.max.ps.512" => "__builtin_ia32_maxps512_mask",
+        "llvm.x86.avx512.max.pd.512" => "__builtin_ia32_maxpd512_mask",
+        "llvm.x86.avx512.mask.pmaxu.q.256" => "__builtin_ia32_pmaxuq256_mask",
+        "llvm.x86.avx512.mask.pmaxu.q.128" => "__builtin_ia32_pmaxuq128_mask",
+        "llvm.x86.avx512.mask.pmins.q.256" => "__builtin_ia32_pminsq256_mask",
+        "llvm.x86.avx512.mask.pmins.q.128" => "__builtin_ia32_pminsq128_mask",
+        "llvm.x86.avx512.min.ps.512" => "__builtin_ia32_minps512_mask",
+        "llvm.x86.avx512.min.pd.512" => "__builtin_ia32_minpd512_mask",
+        "llvm.x86.avx512.mask.pminu.q.256" => "__builtin_ia32_pminuq256_mask",
+        "llvm.x86.avx512.mask.pminu.q.128" => "__builtin_ia32_pminuq128_mask",
+        "llvm.fma.v16f32" => "__builtin_ia32_vfmaddps512_mask",
+        "llvm.fma.v8f64" => "__builtin_ia32_vfmaddpd512_mask",
+        "llvm.x86.avx512.vfmaddsub.ps.512" => "__builtin_ia32_vfmaddps512_mask",
+        "llvm.x86.avx512.vfmaddsub.pd.512" => "__builtin_ia32_vfmaddpd512_mask",
 
         // The above doc points to unknown builtins for the following, so override them:
         "llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si",