diff options
| -rw-r--r-- | src/builder.rs | 104 | ||||
| -rw-r--r-- | src/context.rs | 14 | ||||
| -rw-r--r-- | src/declare.rs | 5 | ||||
| -rw-r--r-- | src/intrinsic/archs.rs | 5 | ||||
| -rw-r--r-- | src/intrinsic/llvm.rs | 18 |
5 files changed, 141 insertions, 5 deletions
diff --git a/src/builder.rs b/src/builder.rs index a53115c578b..82b0e64e582 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -217,11 +217,27 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { return Cow::Borrowed(args); } + let func_name = format!("{:?}", func_ptr); + let casted_args: Vec<_> = param_types .into_iter() .zip(args.iter()) .enumerate() .map(|(index, (expected_ty, &actual_val))| { + // NOTE: these intrinsics have missing parameters before the last one, so ignore the + // last argument type check. + // FIXME(antoyo): find a way to refactor in order to avoid this hack. + match &*func_name { + "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask" + | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" | "__builtin_ia32_sqrtps512_mask" + | "__builtin_ia32_sqrtpd512_mask" => { + if index == args.len() - 1 { + return actual_val; + } + }, + _ => (), + } + let actual_ty = actual_val.get_type(); if expected_ty != actual_ty { if !actual_ty.is_vector() && !expected_ty.is_vector() && actual_ty.is_integral() && expected_ty.is_integral() && actual_ty.get_size() != expected_ty.get_size() { @@ -286,7 +302,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { } fn function_ptr_call(&mut self, func_ptr: RValue<'gcc>, args: &[RValue<'gcc>], _funclet: Option<&Funclet>) -> RValue<'gcc> { - let args = self.check_ptr_call("call", func_ptr, args); + let mut args = self.check_ptr_call("call", func_ptr, args); // gccjit requires to use the result of functions, even when it's not used. // That's why we assign the result to a local or call add_eval(). @@ -298,6 +314,92 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { if return_type != void_type { unsafe { RETURN_VALUE_COUNT += 1 }; let result = current_func.new_local(None, return_type, &format!("ptrReturnValue{}", unsafe { RETURN_VALUE_COUNT })); + // Some LLVM intrinsics do not map 1-to-1 to GCC intrinsics, so we add the missing + // arguments here. + if gcc_func.get_param_count() != args.len() { + let func_name = format!("{:?}", func_ptr); + match &*func_name { + "__builtin_ia32_pmuldq512_mask" | "__builtin_ia32_pmuludq512_mask" + // FIXME(antoyo): the following intrinsics has 4 (or 5) arguments according to the doc, but is defined with 2 (or 3) arguments in library/stdarch/crates/core_arch/src/x86/avx512f.rs. + | "__builtin_ia32_pmaxsd512_mask" | "__builtin_ia32_pmaxsq512_mask" | "__builtin_ia32_pmaxsq256_mask" + | "__builtin_ia32_pmaxsq128_mask" | "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask" + | "__builtin_ia32_pmaxud512_mask" | "__builtin_ia32_pmaxuq512_mask" | "__builtin_ia32_pmaxuq256_mask" + | "__builtin_ia32_pmaxuq128_mask" + | "__builtin_ia32_pminsd512_mask" | "__builtin_ia32_pminsq512_mask" | "__builtin_ia32_pminsq256_mask" + | "__builtin_ia32_pminsq128_mask" | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" + | "__builtin_ia32_pminud512_mask" | "__builtin_ia32_pminuq512_mask" | "__builtin_ia32_pminuq256_mask" + | "__builtin_ia32_pminuq128_mask" | "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" + => { + // TODO: refactor by separating those intrinsics outside of this branch. + let add_before_last_arg = + match &*func_name { + "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask" + | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" + | "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => true, + _ => false, + }; + let new_first_arg_is_zero = + match &*func_name { + "__builtin_ia32_pmaxuq256_mask" | "__builtin_ia32_pmaxuq128_mask" + | "__builtin_ia32_pminuq256_mask" | "__builtin_ia32_pminuq128_mask" => true, + _ => false + }; + let arg3_index = + match &*func_name { + "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 1, + _ => 2, + }; + let mut new_args = args.to_vec(); + let arg3_type = gcc_func.get_param_type(arg3_index); + let first_arg = + if new_first_arg_is_zero { + let vector_type = arg3_type.dyncast_vector().expect("vector type"); + let zero = self.context.new_rvalue_zero(vector_type.get_element_type()); + let num_units = vector_type.get_num_units(); + self.context.new_rvalue_from_vector(None, arg3_type, &vec![zero; num_units]) + } + else { + self.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue() + }; + if add_before_last_arg { + new_args.insert(new_args.len() - 1, first_arg); + } + else { + new_args.push(first_arg); + } + let arg4_index = + match &*func_name { + "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 2, + _ => 3, + }; + let arg4_type = gcc_func.get_param_type(arg4_index); + let minus_one = self.context.new_rvalue_from_int(arg4_type, -1); + if add_before_last_arg { + new_args.insert(new_args.len() - 1, minus_one); + } + else { + new_args.push(minus_one); + } + args = new_args.into(); + }, + "__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => { + let mut new_args = args.to_vec(); + if args.len() == 3 { + // Both llvm.fma.v16f32 and llvm.x86.avx512.vfmaddsub.ps.512 maps to + // the same GCC intrinsic, but the former has 3 parameters and the + // latter has 4 so it doesn't require this additional argument. + let arg4_type = gcc_func.get_param_type(3); + let minus_one = self.context.new_rvalue_from_int(arg4_type, -1); + new_args.push(minus_one); + } + + let arg5_type = gcc_func.get_param_type(4); + new_args.push(self.context.new_rvalue_from_int(arg5_type, 4)); + args = new_args.into(); + }, + _ => (), + } + } self.block.add_assignment(None, result, self.cx.context.new_call_through_ptr(None, func_ptr, &args)); result.to_rvalue() } diff --git a/src/context.rs b/src/context.rs index 92b30ef9b4d..4bc8c5a6760 100644 --- a/src/context.rs +++ b/src/context.rs @@ -35,6 +35,7 @@ pub struct CodegenCx<'gcc, 'tcx> { pub normal_function_addresses: RefCell<FxHashSet<RValue<'gcc>>>, pub functions: RefCell<FxHashMap<String, Function<'gcc>>>, + pub intrinsics: RefCell<FxHashMap<String, Function<'gcc>>>, pub tls_model: gccjit::TlsModel, @@ -184,6 +185,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { current_func: RefCell::new(None), normal_function_addresses: Default::default(), functions: RefCell::new(functions), + intrinsics: RefCell::new(FxHashMap::default()), tls_model, @@ -315,8 +317,16 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> { } fn get_fn_addr(&self, instance: Instance<'tcx>) -> RValue<'gcc> { - let func = get_fn(self, instance); - let func = self.rvalue_as_function(func); + let func_name = self.tcx.symbol_name(instance).name; + + let func = + if self.intrinsics.borrow().contains_key(func_name) { + self.intrinsics.borrow()[func_name].clone() + } + else { + let func = get_fn(self, instance); + self.rvalue_as_function(func) + }; let ptr = func.get_address(None); // TODO(antoyo): don't do this twice: i.e. in declare_fn and here. diff --git a/src/declare.rs b/src/declare.rs index 43017376916..8b2146c5aa8 100644 --- a/src/declare.rs +++ b/src/declare.rs @@ -11,6 +11,7 @@ use crate::intrinsic::llvm; impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { pub fn get_or_insert_global(&self, name: &str, ty: Type<'gcc>, is_tls: bool, link_section: Option<Symbol>) -> LValue<'gcc> { if self.globals.borrow().contains_key(name) { + // TODO: use [] instead of .get().expect()? let typ = self.globals.borrow().get(name).expect("global").get_type(); let global = self.context.new_global(None, GlobalKind::Imported, typ, name); if is_tls { @@ -103,7 +104,9 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { /// update the declaration and return existing Value instead. fn declare_raw_fn<'gcc>(cx: &CodegenCx<'gcc, '_>, name: &str, _callconv: () /*llvm::CallConv*/, return_type: Type<'gcc>, param_types: &[Type<'gcc>], variadic: bool) -> Function<'gcc> { if name.starts_with("llvm.") { - return llvm::intrinsic(name, cx); + let intrinsic = llvm::intrinsic(name, cx); + cx.intrinsics.borrow_mut().insert(name.to_string(), intrinsic); + return intrinsic; } let func = if cx.functions.borrow().contains_key(name) { diff --git a/src/intrinsic/archs.rs b/src/intrinsic/archs.rs index bfeb30f2913..fbcfc8be859 100644 --- a/src/intrinsic/archs.rs +++ b/src/intrinsic/archs.rs @@ -4275,5 +4275,8 @@ match name { "llvm.xcore.getid" => "__builtin_getid", "llvm.xcore.getps" => "__builtin_getps", "llvm.xcore.setps" => "__builtin_setps", - _ => unimplemented!("***** unsupported LLVM intrinsic {}", name), + _ => { + println!("***** unsupported LLVM intrinsic {}", name); + "" + }, } diff --git a/src/intrinsic/llvm.rs b/src/intrinsic/llvm.rs index aab93b92755..16f0df8a207 100644 --- a/src/intrinsic/llvm.rs +++ b/src/intrinsic/llvm.rs @@ -21,6 +21,24 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function "llvm.x86.xgetbv" => "__builtin_ia32_xgetbv", // NOTE: this doc specifies the equivalent GCC builtins: http://huonw.github.io/llvmint/llvmint/x86/index.html "llvm.sqrt.v2f64" => "__builtin_ia32_sqrtpd", + "llvm.x86.avx512.pmul.dq.512" => "__builtin_ia32_pmuldq512_mask", + "llvm.x86.avx512.pmulu.dq.512" => "__builtin_ia32_pmuludq512_mask", + "llvm.x86.avx512.mask.pmaxs.q.256" => "__builtin_ia32_pmaxsq256_mask", + "llvm.x86.avx512.mask.pmaxs.q.128" => "__builtin_ia32_pmaxsq128_mask", + "llvm.x86.avx512.max.ps.512" => "__builtin_ia32_maxps512_mask", + "llvm.x86.avx512.max.pd.512" => "__builtin_ia32_maxpd512_mask", + "llvm.x86.avx512.mask.pmaxu.q.256" => "__builtin_ia32_pmaxuq256_mask", + "llvm.x86.avx512.mask.pmaxu.q.128" => "__builtin_ia32_pmaxuq128_mask", + "llvm.x86.avx512.mask.pmins.q.256" => "__builtin_ia32_pminsq256_mask", + "llvm.x86.avx512.mask.pmins.q.128" => "__builtin_ia32_pminsq128_mask", + "llvm.x86.avx512.min.ps.512" => "__builtin_ia32_minps512_mask", + "llvm.x86.avx512.min.pd.512" => "__builtin_ia32_minpd512_mask", + "llvm.x86.avx512.mask.pminu.q.256" => "__builtin_ia32_pminuq256_mask", + "llvm.x86.avx512.mask.pminu.q.128" => "__builtin_ia32_pminuq128_mask", + "llvm.fma.v16f32" => "__builtin_ia32_vfmaddps512_mask", + "llvm.fma.v8f64" => "__builtin_ia32_vfmaddpd512_mask", + "llvm.x86.avx512.vfmaddsub.ps.512" => "__builtin_ia32_vfmaddps512_mask", + "llvm.x86.avx512.vfmaddsub.pd.512" => "__builtin_ia32_vfmaddpd512_mask", // The above doc points to unknown builtins for the following, so override them: "llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si", |
