diff options
Diffstat (limited to 'compiler/rustc_codegen_gcc/src/intrinsic')
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/intrinsic/archs.rs | 27 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs | 524 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/intrinsic/mod.rs | 1101 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/intrinsic/simd.rs | 230 |
4 files changed, 1164 insertions, 718 deletions
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs b/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs index 15d67385c3e..c4ae1751fa0 100644 --- a/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs +++ b/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs @@ -151,8 +151,10 @@ match name { "llvm.amdgcn.msad.u8" => "__builtin_amdgcn_msad_u8", "llvm.amdgcn.perm" => "__builtin_amdgcn_perm", "llvm.amdgcn.permlane16" => "__builtin_amdgcn_permlane16", + "llvm.amdgcn.permlane16.var" => "__builtin_amdgcn_permlane16_var", "llvm.amdgcn.permlane64" => "__builtin_amdgcn_permlane64", "llvm.amdgcn.permlanex16" => "__builtin_amdgcn_permlanex16", + "llvm.amdgcn.permlanex16.var" => "__builtin_amdgcn_permlanex16_var", "llvm.amdgcn.qsad.pk.u16.u8" => "__builtin_amdgcn_qsad_pk_u16_u8", "llvm.amdgcn.queue.ptr" => "__builtin_amdgcn_queue_ptr", "llvm.amdgcn.rcp.legacy" => "__builtin_amdgcn_rcp_legacy", @@ -160,11 +162,20 @@ match name { "llvm.amdgcn.readlane" => "__builtin_amdgcn_readlane", "llvm.amdgcn.rsq.legacy" => "__builtin_amdgcn_rsq_legacy", "llvm.amdgcn.s.barrier" => "__builtin_amdgcn_s_barrier", + "llvm.amdgcn.s.barrier.init" => "__builtin_amdgcn_s_barrier_init", + "llvm.amdgcn.s.barrier.join" => "__builtin_amdgcn_s_barrier_join", + "llvm.amdgcn.s.barrier.leave" => "__builtin_amdgcn_s_barrier_leave", + "llvm.amdgcn.s.barrier.signal" => "__builtin_amdgcn_s_barrier_signal", + "llvm.amdgcn.s.barrier.signal.isfirst" => "__builtin_amdgcn_s_barrier_signal_isfirst", + "llvm.amdgcn.s.barrier.signal.isfirst.var" => "__builtin_amdgcn_s_barrier_signal_isfirst_var", + "llvm.amdgcn.s.barrier.signal.var" => "__builtin_amdgcn_s_barrier_signal_var", + "llvm.amdgcn.s.barrier.wait" => "__builtin_amdgcn_s_barrier_wait", "llvm.amdgcn.s.dcache.inv" => "__builtin_amdgcn_s_dcache_inv", "llvm.amdgcn.s.dcache.inv.vol" => "__builtin_amdgcn_s_dcache_inv_vol", "llvm.amdgcn.s.dcache.wb" => "__builtin_amdgcn_s_dcache_wb", "llvm.amdgcn.s.dcache.wb.vol" => "__builtin_amdgcn_s_dcache_wb_vol", "llvm.amdgcn.s.decperflevel" => "__builtin_amdgcn_s_decperflevel", + "llvm.amdgcn.s.get.barrier.state" => "__builtin_amdgcn_s_get_barrier_state", "llvm.amdgcn.s.get.waveid.in.workgroup" => "__builtin_amdgcn_s_get_waveid_in_workgroup", "llvm.amdgcn.s.getpc" => "__builtin_amdgcn_s_getpc", "llvm.amdgcn.s.getreg" => "__builtin_amdgcn_s_getreg", @@ -176,8 +187,10 @@ match name { "llvm.amdgcn.s.setprio" => "__builtin_amdgcn_s_setprio", "llvm.amdgcn.s.setreg" => "__builtin_amdgcn_s_setreg", "llvm.amdgcn.s.sleep" => "__builtin_amdgcn_s_sleep", + "llvm.amdgcn.s.sleep.var" => "__builtin_amdgcn_s_sleep_var", "llvm.amdgcn.s.wait.event.export.ready" => "__builtin_amdgcn_s_wait_event_export_ready", "llvm.amdgcn.s.waitcnt" => "__builtin_amdgcn_s_waitcnt", + "llvm.amdgcn.s.wakeup.barrier" => "__builtin_amdgcn_s_wakeup_barrier", "llvm.amdgcn.sad.hi.u8" => "__builtin_amdgcn_sad_hi_u8", "llvm.amdgcn.sad.u16" => "__builtin_amdgcn_sad_u16", "llvm.amdgcn.sad.u8" => "__builtin_amdgcn_sad_u8", @@ -314,6 +327,8 @@ match name { // bpf "llvm.bpf.btf.type.id" => "__builtin_bpf_btf_type_id", "llvm.bpf.compare" => "__builtin_bpf_compare", + "llvm.bpf.getelementptr.and.load" => "__builtin_bpf_getelementptr_and_load", + "llvm.bpf.getelementptr.and.store" => "__builtin_bpf_getelementptr_and_store", "llvm.bpf.load.byte" => "__builtin_bpf_load_byte", "llvm.bpf.load.half" => "__builtin_bpf_load_half", "llvm.bpf.load.word" => "__builtin_bpf_load_word", @@ -5776,14 +5791,6 @@ match name { "llvm.s390.verimf" => "__builtin_s390_verimf", "llvm.s390.verimg" => "__builtin_s390_verimg", "llvm.s390.verimh" => "__builtin_s390_verimh", - "llvm.s390.verllb" => "__builtin_s390_verllb", - "llvm.s390.verllf" => "__builtin_s390_verllf", - "llvm.s390.verllg" => "__builtin_s390_verllg", - "llvm.s390.verllh" => "__builtin_s390_verllh", - "llvm.s390.verllvb" => "__builtin_s390_verllvb", - "llvm.s390.verllvf" => "__builtin_s390_verllvf", - "llvm.s390.verllvg" => "__builtin_s390_verllvg", - "llvm.s390.verllvh" => "__builtin_s390_verllvh", "llvm.s390.vfaeb" => "__builtin_s390_vfaeb", "llvm.s390.vfaef" => "__builtin_s390_vfaef", "llvm.s390.vfaeh" => "__builtin_s390_vfaeh", @@ -5815,7 +5822,7 @@ match name { "llvm.s390.vistrh" => "__builtin_s390_vistrh", "llvm.s390.vlbb" => "__builtin_s390_vlbb", "llvm.s390.vll" => "__builtin_s390_vll", - "llvm.s390.vlrl" => "__builtin_s390_vlrl", + "llvm.s390.vlrl" => "__builtin_s390_vlrlr", "llvm.s390.vmaeb" => "__builtin_s390_vmaeb", "llvm.s390.vmaef" => "__builtin_s390_vmaef", "llvm.s390.vmaeh" => "__builtin_s390_vmaeh", @@ -5885,7 +5892,7 @@ match name { "llvm.s390.vstrczb" => "__builtin_s390_vstrczb", "llvm.s390.vstrczf" => "__builtin_s390_vstrczf", "llvm.s390.vstrczh" => "__builtin_s390_vstrczh", - "llvm.s390.vstrl" => "__builtin_s390_vstrl", + "llvm.s390.vstrl" => "__builtin_s390_vstrlr", "llvm.s390.vsumb" => "__builtin_s390_vsumb", "llvm.s390.vsumgf" => "__builtin_s390_vsumgf", "llvm.s390.vsumgh" => "__builtin_s390_vsumgh", diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs b/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs index 35eb4a11005..ce8dee69a98 100644 --- a/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs +++ b/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs @@ -3,94 +3,185 @@ use std::borrow::Cow; use gccjit::{Function, FunctionPtrType, RValue, ToRValue, UnaryOp}; use rustc_codegen_ssa::traits::BuilderMethods; -use crate::{context::CodegenCx, builder::Builder}; +use crate::{builder::Builder, context::CodegenCx}; -pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc, 'tcx>, gcc_func: FunctionPtrType<'gcc>, mut args: Cow<'b, [RValue<'gcc>]>, func_name: &str, original_function_name: Option<&String>) -> Cow<'b, [RValue<'gcc>]> { +pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>( + builder: &Builder<'a, 'gcc, 'tcx>, + gcc_func: FunctionPtrType<'gcc>, + mut args: Cow<'b, [RValue<'gcc>]>, + func_name: &str, + original_function_name: Option<&String>, +) -> Cow<'b, [RValue<'gcc>]> { // Some LLVM intrinsics do not map 1-to-1 to GCC intrinsics, so we add the missing // arguments here. if gcc_func.get_param_count() != args.len() { match &*func_name { // NOTE: the following intrinsics have a different number of parameters in LLVM and GCC. - "__builtin_ia32_prold512_mask" | "__builtin_ia32_pmuldq512_mask" | "__builtin_ia32_pmuludq512_mask" - | "__builtin_ia32_pmaxsd512_mask" | "__builtin_ia32_pmaxsq512_mask" | "__builtin_ia32_pmaxsq256_mask" - | "__builtin_ia32_pmaxsq128_mask" | "__builtin_ia32_pmaxud512_mask" | "__builtin_ia32_pmaxuq512_mask" - | "__builtin_ia32_pminsd512_mask" | "__builtin_ia32_pminsq512_mask" | "__builtin_ia32_pminsq256_mask" - | "__builtin_ia32_pminsq128_mask" | "__builtin_ia32_pminud512_mask" | "__builtin_ia32_pminuq512_mask" - | "__builtin_ia32_prolq512_mask" | "__builtin_ia32_prorq512_mask" | "__builtin_ia32_pslldi512_mask" - | "__builtin_ia32_psrldi512_mask" | "__builtin_ia32_psllqi512_mask" | "__builtin_ia32_psrlqi512_mask" - | "__builtin_ia32_pslld512_mask" | "__builtin_ia32_psrld512_mask" | "__builtin_ia32_psllq512_mask" - | "__builtin_ia32_psrlq512_mask" | "__builtin_ia32_psrad512_mask" | "__builtin_ia32_psraq512_mask" - | "__builtin_ia32_psradi512_mask" | "__builtin_ia32_psraqi512_mask" | "__builtin_ia32_psrav16si_mask" - | "__builtin_ia32_psrav8di_mask" | "__builtin_ia32_prolvd512_mask" | "__builtin_ia32_prorvd512_mask" - | "__builtin_ia32_prolvq512_mask" | "__builtin_ia32_prorvq512_mask" | "__builtin_ia32_psllv16si_mask" - | "__builtin_ia32_psrlv16si_mask" | "__builtin_ia32_psllv8di_mask" | "__builtin_ia32_psrlv8di_mask" - | "__builtin_ia32_permvarsi512_mask" | "__builtin_ia32_vpermilvarps512_mask" - | "__builtin_ia32_vpermilvarpd512_mask" | "__builtin_ia32_permvardi512_mask" - | "__builtin_ia32_permvarsf512_mask" | "__builtin_ia32_permvarqi512_mask" - | "__builtin_ia32_permvarqi256_mask" | "__builtin_ia32_permvarqi128_mask" - | "__builtin_ia32_vpmultishiftqb512_mask" | "__builtin_ia32_vpmultishiftqb256_mask" - | "__builtin_ia32_vpmultishiftqb128_mask" - => { + "__builtin_ia32_prold512_mask" + | "__builtin_ia32_pmuldq512_mask" + | "__builtin_ia32_pmuludq512_mask" + | "__builtin_ia32_pmaxsd512_mask" + | "__builtin_ia32_pmaxsq512_mask" + | "__builtin_ia32_pmaxsq256_mask" + | "__builtin_ia32_pmaxsq128_mask" + | "__builtin_ia32_pmaxud512_mask" + | "__builtin_ia32_pmaxuq512_mask" + | "__builtin_ia32_pminsd512_mask" + | "__builtin_ia32_pminsq512_mask" + | "__builtin_ia32_pminsq256_mask" + | "__builtin_ia32_pminsq128_mask" + | "__builtin_ia32_pminud512_mask" + | "__builtin_ia32_pminuq512_mask" + | "__builtin_ia32_prolq512_mask" + | "__builtin_ia32_prorq512_mask" + | "__builtin_ia32_pslldi512_mask" + | "__builtin_ia32_psrldi512_mask" + | "__builtin_ia32_psllqi512_mask" + | "__builtin_ia32_psrlqi512_mask" + | "__builtin_ia32_pslld512_mask" + | "__builtin_ia32_psrld512_mask" + | "__builtin_ia32_psllq512_mask" + | "__builtin_ia32_psrlq512_mask" + | "__builtin_ia32_psrad512_mask" + | "__builtin_ia32_psraq512_mask" + | "__builtin_ia32_psradi512_mask" + | "__builtin_ia32_psraqi512_mask" + | "__builtin_ia32_psrav16si_mask" + | "__builtin_ia32_psrav8di_mask" + | "__builtin_ia32_prolvd512_mask" + | "__builtin_ia32_prorvd512_mask" + | "__builtin_ia32_prolvq512_mask" + | "__builtin_ia32_prorvq512_mask" + | "__builtin_ia32_psllv16si_mask" + | "__builtin_ia32_psrlv16si_mask" + | "__builtin_ia32_psllv8di_mask" + | "__builtin_ia32_psrlv8di_mask" + | "__builtin_ia32_permvarsi512_mask" + | "__builtin_ia32_vpermilvarps512_mask" + | "__builtin_ia32_vpermilvarpd512_mask" + | "__builtin_ia32_permvardi512_mask" + | "__builtin_ia32_permvarsf512_mask" + | "__builtin_ia32_permvarqi512_mask" + | "__builtin_ia32_permvarqi256_mask" + | "__builtin_ia32_permvarqi128_mask" + | "__builtin_ia32_vpmultishiftqb512_mask" + | "__builtin_ia32_vpmultishiftqb256_mask" + | "__builtin_ia32_vpmultishiftqb128_mask" => { let mut new_args = args.to_vec(); let arg3_type = gcc_func.get_param_type(2); - let first_arg = builder.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue(); + let first_arg = builder + .current_func() + .new_local(None, arg3_type, "undefined_for_intrinsic") + .to_rvalue(); new_args.push(first_arg); let arg4_type = gcc_func.get_param_type(3); let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1); new_args.push(minus_one); args = new_args.into(); - }, - "__builtin_ia32_pmaxuq256_mask" | "__builtin_ia32_pmaxuq128_mask" | "__builtin_ia32_pminuq256_mask" - | "__builtin_ia32_pminuq128_mask" | "__builtin_ia32_prold256_mask" | "__builtin_ia32_prold128_mask" - | "__builtin_ia32_prord512_mask" | "__builtin_ia32_prord256_mask" | "__builtin_ia32_prord128_mask" - | "__builtin_ia32_prolq256_mask" | "__builtin_ia32_prolq128_mask" | "__builtin_ia32_prorq256_mask" - | "__builtin_ia32_prorq128_mask" | "__builtin_ia32_psraq256_mask" | "__builtin_ia32_psraq128_mask" - | "__builtin_ia32_psraqi256_mask" | "__builtin_ia32_psraqi128_mask" | "__builtin_ia32_psravq256_mask" - | "__builtin_ia32_psravq128_mask" | "__builtin_ia32_prolvd256_mask" | "__builtin_ia32_prolvd128_mask" - | "__builtin_ia32_prorvd256_mask" | "__builtin_ia32_prorvd128_mask" | "__builtin_ia32_prolvq256_mask" - | "__builtin_ia32_prolvq128_mask" | "__builtin_ia32_prorvq256_mask" | "__builtin_ia32_prorvq128_mask" - | "__builtin_ia32_permvardi256_mask" | "__builtin_ia32_permvardf512_mask" | "__builtin_ia32_permvardf256_mask" - | "__builtin_ia32_pmulhuw512_mask" | "__builtin_ia32_pmulhw512_mask" | "__builtin_ia32_pmulhrsw512_mask" - | "__builtin_ia32_pmaxuw512_mask" | "__builtin_ia32_pmaxub512_mask" | "__builtin_ia32_pmaxsw512_mask" - | "__builtin_ia32_pmaxsb512_mask" | "__builtin_ia32_pminuw512_mask" | "__builtin_ia32_pminub512_mask" - | "__builtin_ia32_pminsw512_mask" | "__builtin_ia32_pminsb512_mask" - | "__builtin_ia32_pmaddwd512_mask" | "__builtin_ia32_pmaddubsw512_mask" | "__builtin_ia32_packssdw512_mask" - | "__builtin_ia32_packsswb512_mask" | "__builtin_ia32_packusdw512_mask" | "__builtin_ia32_packuswb512_mask" - | "__builtin_ia32_pavgw512_mask" | "__builtin_ia32_pavgb512_mask" | "__builtin_ia32_psllw512_mask" - | "__builtin_ia32_psllwi512_mask" | "__builtin_ia32_psllv32hi_mask" | "__builtin_ia32_psrlw512_mask" - | "__builtin_ia32_psrlwi512_mask" | "__builtin_ia32_psllv16hi_mask" | "__builtin_ia32_psllv8hi_mask" - | "__builtin_ia32_psrlv32hi_mask" | "__builtin_ia32_psraw512_mask" | "__builtin_ia32_psrawi512_mask" - | "__builtin_ia32_psrlv16hi_mask" | "__builtin_ia32_psrlv8hi_mask" | "__builtin_ia32_psrav32hi_mask" - | "__builtin_ia32_permvarhi512_mask" | "__builtin_ia32_pshufb512_mask" | "__builtin_ia32_psrav16hi_mask" - | "__builtin_ia32_psrav8hi_mask" | "__builtin_ia32_permvarhi256_mask" | "__builtin_ia32_permvarhi128_mask" - => { + } + "__builtin_ia32_pmaxuq256_mask" + | "__builtin_ia32_pmaxuq128_mask" + | "__builtin_ia32_pminuq256_mask" + | "__builtin_ia32_pminuq128_mask" + | "__builtin_ia32_prold256_mask" + | "__builtin_ia32_prold128_mask" + | "__builtin_ia32_prord512_mask" + | "__builtin_ia32_prord256_mask" + | "__builtin_ia32_prord128_mask" + | "__builtin_ia32_prolq256_mask" + | "__builtin_ia32_prolq128_mask" + | "__builtin_ia32_prorq256_mask" + | "__builtin_ia32_prorq128_mask" + | "__builtin_ia32_psraq256_mask" + | "__builtin_ia32_psraq128_mask" + | "__builtin_ia32_psraqi256_mask" + | "__builtin_ia32_psraqi128_mask" + | "__builtin_ia32_psravq256_mask" + | "__builtin_ia32_psravq128_mask" + | "__builtin_ia32_prolvd256_mask" + | "__builtin_ia32_prolvd128_mask" + | "__builtin_ia32_prorvd256_mask" + | "__builtin_ia32_prorvd128_mask" + | "__builtin_ia32_prolvq256_mask" + | "__builtin_ia32_prolvq128_mask" + | "__builtin_ia32_prorvq256_mask" + | "__builtin_ia32_prorvq128_mask" + | "__builtin_ia32_permvardi256_mask" + | "__builtin_ia32_permvardf512_mask" + | "__builtin_ia32_permvardf256_mask" + | "__builtin_ia32_pmulhuw512_mask" + | "__builtin_ia32_pmulhw512_mask" + | "__builtin_ia32_pmulhrsw512_mask" + | "__builtin_ia32_pmaxuw512_mask" + | "__builtin_ia32_pmaxub512_mask" + | "__builtin_ia32_pmaxsw512_mask" + | "__builtin_ia32_pmaxsb512_mask" + | "__builtin_ia32_pminuw512_mask" + | "__builtin_ia32_pminub512_mask" + | "__builtin_ia32_pminsw512_mask" + | "__builtin_ia32_pminsb512_mask" + | "__builtin_ia32_pmaddwd512_mask" + | "__builtin_ia32_pmaddubsw512_mask" + | "__builtin_ia32_packssdw512_mask" + | "__builtin_ia32_packsswb512_mask" + | "__builtin_ia32_packusdw512_mask" + | "__builtin_ia32_packuswb512_mask" + | "__builtin_ia32_pavgw512_mask" + | "__builtin_ia32_pavgb512_mask" + | "__builtin_ia32_psllw512_mask" + | "__builtin_ia32_psllwi512_mask" + | "__builtin_ia32_psllv32hi_mask" + | "__builtin_ia32_psrlw512_mask" + | "__builtin_ia32_psrlwi512_mask" + | "__builtin_ia32_psllv16hi_mask" + | "__builtin_ia32_psllv8hi_mask" + | "__builtin_ia32_psrlv32hi_mask" + | "__builtin_ia32_psraw512_mask" + | "__builtin_ia32_psrawi512_mask" + | "__builtin_ia32_psrlv16hi_mask" + | "__builtin_ia32_psrlv8hi_mask" + | "__builtin_ia32_psrav32hi_mask" + | "__builtin_ia32_permvarhi512_mask" + | "__builtin_ia32_pshufb512_mask" + | "__builtin_ia32_psrav16hi_mask" + | "__builtin_ia32_psrav8hi_mask" + | "__builtin_ia32_permvarhi256_mask" + | "__builtin_ia32_permvarhi128_mask" => { let mut new_args = args.to_vec(); let arg3_type = gcc_func.get_param_type(2); let vector_type = arg3_type.dyncast_vector().expect("vector type"); let zero = builder.context.new_rvalue_zero(vector_type.get_element_type()); let num_units = vector_type.get_num_units(); - let first_arg = builder.context.new_rvalue_from_vector(None, arg3_type, &vec![zero; num_units]); + let first_arg = + builder.context.new_rvalue_from_vector(None, arg3_type, &vec![zero; num_units]); new_args.push(first_arg); let arg4_type = gcc_func.get_param_type(3); let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1); new_args.push(minus_one); args = new_args.into(); - }, - "__builtin_ia32_dbpsadbw512_mask" | "__builtin_ia32_dbpsadbw256_mask" | "__builtin_ia32_dbpsadbw128_mask" => { + } + "__builtin_ia32_dbpsadbw512_mask" + | "__builtin_ia32_dbpsadbw256_mask" + | "__builtin_ia32_dbpsadbw128_mask" => { let mut new_args = args.to_vec(); let arg4_type = gcc_func.get_param_type(3); let vector_type = arg4_type.dyncast_vector().expect("vector type"); let zero = builder.context.new_rvalue_zero(vector_type.get_element_type()); let num_units = vector_type.get_num_units(); - let first_arg = builder.context.new_rvalue_from_vector(None, arg4_type, &vec![zero; num_units]); + let first_arg = + builder.context.new_rvalue_from_vector(None, arg4_type, &vec![zero; num_units]); new_args.push(first_arg); let arg5_type = gcc_func.get_param_type(4); let minus_one = builder.context.new_rvalue_from_int(arg5_type, -1); new_args.push(minus_one); args = new_args.into(); - }, - "__builtin_ia32_vplzcntd_512_mask" | "__builtin_ia32_vplzcntd_256_mask" | "__builtin_ia32_vplzcntd_128_mask" - | "__builtin_ia32_vplzcntq_512_mask" | "__builtin_ia32_vplzcntq_256_mask" | "__builtin_ia32_vplzcntq_128_mask" => { + } + "__builtin_ia32_vplzcntd_512_mask" + | "__builtin_ia32_vplzcntd_256_mask" + | "__builtin_ia32_vplzcntd_128_mask" + | "__builtin_ia32_vplzcntq_512_mask" + | "__builtin_ia32_vplzcntq_256_mask" + | "__builtin_ia32_vplzcntq_128_mask" => { let mut new_args = args.to_vec(); // Remove last arg as it doesn't seem to be used in GCC and is always false. new_args.pop(); @@ -98,37 +189,45 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc let vector_type = arg2_type.dyncast_vector().expect("vector type"); let zero = builder.context.new_rvalue_zero(vector_type.get_element_type()); let num_units = vector_type.get_num_units(); - let first_arg = builder.context.new_rvalue_from_vector(None, arg2_type, &vec![zero; num_units]); + let first_arg = + builder.context.new_rvalue_from_vector(None, arg2_type, &vec![zero; num_units]); new_args.push(first_arg); let arg3_type = gcc_func.get_param_type(2); let minus_one = builder.context.new_rvalue_from_int(arg3_type, -1); new_args.push(minus_one); args = new_args.into(); - }, - "__builtin_ia32_vpconflictsi_512_mask" | "__builtin_ia32_vpconflictsi_256_mask" - | "__builtin_ia32_vpconflictsi_128_mask" | "__builtin_ia32_vpconflictdi_512_mask" - | "__builtin_ia32_vpconflictdi_256_mask" | "__builtin_ia32_vpconflictdi_128_mask" => { + } + "__builtin_ia32_vpconflictsi_512_mask" + | "__builtin_ia32_vpconflictsi_256_mask" + | "__builtin_ia32_vpconflictsi_128_mask" + | "__builtin_ia32_vpconflictdi_512_mask" + | "__builtin_ia32_vpconflictdi_256_mask" + | "__builtin_ia32_vpconflictdi_128_mask" => { let mut new_args = args.to_vec(); let arg2_type = gcc_func.get_param_type(1); let vector_type = arg2_type.dyncast_vector().expect("vector type"); let zero = builder.context.new_rvalue_zero(vector_type.get_element_type()); let num_units = vector_type.get_num_units(); - let first_arg = builder.context.new_rvalue_from_vector(None, arg2_type, &vec![zero; num_units]); + let first_arg = + builder.context.new_rvalue_from_vector(None, arg2_type, &vec![zero; num_units]); new_args.push(first_arg); let arg3_type = gcc_func.get_param_type(2); let minus_one = builder.context.new_rvalue_from_int(arg3_type, -1); new_args.push(minus_one); args = new_args.into(); - }, - "__builtin_ia32_pternlogd512_mask" | "__builtin_ia32_pternlogd256_mask" - | "__builtin_ia32_pternlogd128_mask" | "__builtin_ia32_pternlogq512_mask" - | "__builtin_ia32_pternlogq256_mask" | "__builtin_ia32_pternlogq128_mask" => { + } + "__builtin_ia32_pternlogd512_mask" + | "__builtin_ia32_pternlogd256_mask" + | "__builtin_ia32_pternlogd128_mask" + | "__builtin_ia32_pternlogq512_mask" + | "__builtin_ia32_pternlogq256_mask" + | "__builtin_ia32_pternlogq128_mask" => { let mut new_args = args.to_vec(); let arg5_type = gcc_func.get_param_type(4); let minus_one = builder.context.new_rvalue_from_int(arg5_type, -1); new_args.push(minus_one); args = new_args.into(); - }, + } "__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => { let mut new_args = args.to_vec(); @@ -154,24 +253,33 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc } args = new_args.into(); - }, - "__builtin_ia32_addps512_mask" | "__builtin_ia32_addpd512_mask" - | "__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask" - | "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask" - | "__builtin_ia32_divps512_mask" | "__builtin_ia32_divpd512_mask" - | "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask" - | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" => { + } + "__builtin_ia32_addps512_mask" + | "__builtin_ia32_addpd512_mask" + | "__builtin_ia32_subps512_mask" + | "__builtin_ia32_subpd512_mask" + | "__builtin_ia32_mulps512_mask" + | "__builtin_ia32_mulpd512_mask" + | "__builtin_ia32_divps512_mask" + | "__builtin_ia32_divpd512_mask" + | "__builtin_ia32_maxps512_mask" + | "__builtin_ia32_maxpd512_mask" + | "__builtin_ia32_minps512_mask" + | "__builtin_ia32_minpd512_mask" => { let mut new_args = args.to_vec(); let last_arg = new_args.pop().expect("last arg"); let arg3_type = gcc_func.get_param_type(2); - let undefined = builder.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue(); + let undefined = builder + .current_func() + .new_local(None, arg3_type, "undefined_for_intrinsic") + .to_rvalue(); new_args.push(undefined); let arg4_type = gcc_func.get_param_type(3); let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1); new_args.push(minus_one); new_args.push(last_arg); args = new_args.into(); - }, + } "__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask" => { let mut new_args = args.to_vec(); let last_arg = new_args.pop().expect("last arg"); @@ -180,54 +288,72 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc new_args.push(minus_one); new_args.push(last_arg); args = new_args.into(); - }, - "__builtin_ia32_vpermi2vard512_mask" | "__builtin_ia32_vpermi2vard256_mask" - | "__builtin_ia32_vpermi2vard128_mask" | "__builtin_ia32_vpermi2varq512_mask" - | "__builtin_ia32_vpermi2varq256_mask" | "__builtin_ia32_vpermi2varq128_mask" - | "__builtin_ia32_vpermi2varps512_mask" | "__builtin_ia32_vpermi2varps256_mask" - | "__builtin_ia32_vpermi2varps128_mask" | "__builtin_ia32_vpermi2varpd512_mask" - | "__builtin_ia32_vpermi2varpd256_mask" | "__builtin_ia32_vpermi2varpd128_mask" | "__builtin_ia32_vpmadd52huq512_mask" - | "__builtin_ia32_vpmadd52luq512_mask" | "__builtin_ia32_vpmadd52huq256_mask" | "__builtin_ia32_vpmadd52luq256_mask" - | "__builtin_ia32_vpmadd52huq128_mask" - => { + } + "__builtin_ia32_vpermi2vard512_mask" + | "__builtin_ia32_vpermi2vard256_mask" + | "__builtin_ia32_vpermi2vard128_mask" + | "__builtin_ia32_vpermi2varq512_mask" + | "__builtin_ia32_vpermi2varq256_mask" + | "__builtin_ia32_vpermi2varq128_mask" + | "__builtin_ia32_vpermi2varps512_mask" + | "__builtin_ia32_vpermi2varps256_mask" + | "__builtin_ia32_vpermi2varps128_mask" + | "__builtin_ia32_vpermi2varpd512_mask" + | "__builtin_ia32_vpermi2varpd256_mask" + | "__builtin_ia32_vpermi2varpd128_mask" + | "__builtin_ia32_vpmadd52huq512_mask" + | "__builtin_ia32_vpmadd52luq512_mask" + | "__builtin_ia32_vpmadd52huq256_mask" + | "__builtin_ia32_vpmadd52luq256_mask" + | "__builtin_ia32_vpmadd52huq128_mask" => { let mut new_args = args.to_vec(); let arg4_type = gcc_func.get_param_type(3); let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1); new_args.push(minus_one); args = new_args.into(); - }, - "__builtin_ia32_cvtdq2ps512_mask" | "__builtin_ia32_cvtudq2ps512_mask" - | "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => { + } + "__builtin_ia32_cvtdq2ps512_mask" + | "__builtin_ia32_cvtudq2ps512_mask" + | "__builtin_ia32_sqrtps512_mask" + | "__builtin_ia32_sqrtpd512_mask" => { let mut new_args = args.to_vec(); let last_arg = new_args.pop().expect("last arg"); let arg2_type = gcc_func.get_param_type(1); - let undefined = builder.current_func().new_local(None, arg2_type, "undefined_for_intrinsic").to_rvalue(); + let undefined = builder + .current_func() + .new_local(None, arg2_type, "undefined_for_intrinsic") + .to_rvalue(); new_args.push(undefined); let arg3_type = gcc_func.get_param_type(2); let minus_one = builder.context.new_rvalue_from_int(arg3_type, -1); new_args.push(minus_one); new_args.push(last_arg); args = new_args.into(); - }, + } "__builtin_ia32_stmxcsr" => { args = vec![].into(); - }, - "__builtin_ia32_addcarryx_u64" | "__builtin_ia32_sbb_u64" | "__builtin_ia32_addcarryx_u32" | "__builtin_ia32_sbb_u32" => { + } + "__builtin_ia32_addcarryx_u64" + | "__builtin_ia32_sbb_u64" + | "__builtin_ia32_addcarryx_u32" + | "__builtin_ia32_sbb_u32" => { let mut new_args = args.to_vec(); let arg2_type = gcc_func.get_param_type(1); let variable = builder.current_func().new_local(None, arg2_type, "addcarryResult"); new_args.push(variable.get_address(None)); args = new_args.into(); - }, - "__builtin_ia32_vpermt2varqi512_mask" | "__builtin_ia32_vpermt2varqi256_mask" - | "__builtin_ia32_vpermt2varqi128_mask" | "__builtin_ia32_vpermt2varhi512_mask" - | "__builtin_ia32_vpermt2varhi256_mask" | "__builtin_ia32_vpermt2varhi128_mask" - => { + } + "__builtin_ia32_vpermt2varqi512_mask" + | "__builtin_ia32_vpermt2varqi256_mask" + | "__builtin_ia32_vpermt2varqi128_mask" + | "__builtin_ia32_vpermt2varhi512_mask" + | "__builtin_ia32_vpermt2varhi256_mask" + | "__builtin_ia32_vpermt2varhi128_mask" => { let new_args = args.to_vec(); let arg4_type = gcc_func.get_param_type(3); let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1); args = vec![new_args[1], new_args[0], new_args[2], minus_one].into(); - }, + } "__builtin_ia32_xrstor" | "__builtin_ia32_xsavec" => { let new_args = args.to_vec(); let thirty_two = builder.context.new_rvalue_from_int(new_args[1].get_type(), 32); @@ -235,22 +361,25 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc let arg2_type = gcc_func.get_param_type(1); let arg2 = builder.context.new_cast(None, arg2, arg2_type); args = vec![new_args[0], arg2].into(); - }, + } // These builtins are sent one more argument than needed. "__builtin_prefetch" => { let mut new_args = args.to_vec(); new_args.pop(); args = new_args.into(); - }, + } // The GCC version returns one value of the tuple through a pointer. "__builtin_ia32_rdrand64_step" => { - let arg = builder.current_func().new_local(None, builder.ulonglong_type, "return_rdrand_arg"); + let arg = builder.current_func().new_local( + None, + builder.ulonglong_type, + "return_rdrand_arg", + ); args = vec![arg.get_address(None)].into(); - }, + } _ => (), } - } - else { + } else { match &*func_name { "__builtin_ia32_rndscaless_mask_round" | "__builtin_ia32_rndscalesd_mask_round" => { let new_args = args.to_vec(); @@ -259,10 +388,10 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc let arg4_type = gcc_func.get_param_type(3); let arg4 = builder.context.new_bitcast(None, new_args[2], arg4_type); args = vec![new_args[0], new_args[1], arg3, arg4, new_args[3], new_args[5]].into(); - }, + } // NOTE: the LLVM intrinsic receives 3 floats, but the GCC builtin requires 3 vectors. // FIXME: the intrinsics like _mm_mask_fmadd_sd should probably directly call the GCC - // instrinsic to avoid this. + // intrinsic to avoid this. "__builtin_ia32_vfmaddss3_round" => { let new_args = args.to_vec(); let arg1_type = gcc_func.get_param_type(0); @@ -272,7 +401,7 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc let b = builder.context.new_rvalue_from_vector(None, arg2_type, &[new_args[1]; 4]); let c = builder.context.new_rvalue_from_vector(None, arg3_type, &[new_args[2]; 4]); args = vec![a, b, c, new_args[3]].into(); - }, + } "__builtin_ia32_vfmaddsd3_round" => { let new_args = args.to_vec(); let arg1_type = gcc_func.get_param_type(0); @@ -282,25 +411,34 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc let b = builder.context.new_rvalue_from_vector(None, arg2_type, &[new_args[1]; 2]); let c = builder.context.new_rvalue_from_vector(None, arg3_type, &[new_args[2]; 2]); args = vec![a, b, c, new_args[3]].into(); - }, - "__builtin_ia32_vfmaddsubpd256" | "__builtin_ia32_vfmaddsubps" | "__builtin_ia32_vfmaddsubps256" - | "__builtin_ia32_vfmaddsubpd" => { + } + "__builtin_ia32_vfmaddsubpd256" + | "__builtin_ia32_vfmaddsubps" + | "__builtin_ia32_vfmaddsubps256" + | "__builtin_ia32_vfmaddsubpd" => { if let Some(original_function_name) = original_function_name { match &**original_function_name { - "llvm.x86.fma.vfmsubadd.pd.256" | "llvm.x86.fma.vfmsubadd.ps" | "llvm.x86.fma.vfmsubadd.ps.256" - | "llvm.x86.fma.vfmsubadd.pd" => { + "llvm.x86.fma.vfmsubadd.pd.256" + | "llvm.x86.fma.vfmsubadd.ps" + | "llvm.x86.fma.vfmsubadd.ps.256" + | "llvm.x86.fma.vfmsubadd.pd" => { // NOTE: since both llvm.x86.fma.vfmsubadd.ps and llvm.x86.fma.vfmaddsub.ps maps to // __builtin_ia32_vfmaddsubps, only add minus if this comes from a // subadd LLVM intrinsic, e.g. _mm256_fmsubadd_pd. let mut new_args = args.to_vec(); let arg3 = &mut new_args[2]; - *arg3 = builder.context.new_unary_op(None, UnaryOp::Minus, arg3.get_type(), *arg3); + *arg3 = builder.context.new_unary_op( + None, + UnaryOp::Minus, + arg3.get_type(), + *arg3, + ); args = new_args.into(); - }, + } _ => (), } } - }, + } "__builtin_ia32_ldmxcsr" => { // The builtin __builtin_ia32_ldmxcsr takes an integer value while llvm.x86.sse.ldmxcsr takes a pointer, // so dereference the pointer. @@ -309,23 +447,31 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc let arg1 = builder.context.new_cast(None, args[0], uint_ptr_type); new_args[0] = arg1.dereference(None).to_rvalue(); args = new_args.into(); - }, - "__builtin_ia32_rcp14sd_mask" | "__builtin_ia32_rcp14ss_mask" | "__builtin_ia32_rsqrt14sd_mask" - | "__builtin_ia32_rsqrt14ss_mask" => { + } + "__builtin_ia32_rcp14sd_mask" + | "__builtin_ia32_rcp14ss_mask" + | "__builtin_ia32_rsqrt14sd_mask" + | "__builtin_ia32_rsqrt14ss_mask" => { let new_args = args.to_vec(); args = vec![new_args[1], new_args[0], new_args[2], new_args[3]].into(); - }, + } "__builtin_ia32_sqrtsd_mask_round" | "__builtin_ia32_sqrtss_mask_round" => { let new_args = args.to_vec(); args = vec![new_args[1], new_args[0], new_args[2], new_args[3], new_args[4]].into(); - }, - "__builtin_ia32_vpshrdv_v8di" | "__builtin_ia32_vpshrdv_v4di" | "__builtin_ia32_vpshrdv_v2di" | - "__builtin_ia32_vpshrdv_v16si" | "__builtin_ia32_vpshrdv_v8si" | "__builtin_ia32_vpshrdv_v4si" | - "__builtin_ia32_vpshrdv_v32hi" | "__builtin_ia32_vpshrdv_v16hi" | "__builtin_ia32_vpshrdv_v8hi" => { + } + "__builtin_ia32_vpshrdv_v8di" + | "__builtin_ia32_vpshrdv_v4di" + | "__builtin_ia32_vpshrdv_v2di" + | "__builtin_ia32_vpshrdv_v16si" + | "__builtin_ia32_vpshrdv_v8si" + | "__builtin_ia32_vpshrdv_v4si" + | "__builtin_ia32_vpshrdv_v32hi" + | "__builtin_ia32_vpshrdv_v16hi" + | "__builtin_ia32_vpshrdv_v8hi" => { // The first two arguments are reversed, compared to LLVM. let new_args = args.to_vec(); args = vec![new_args[1], new_args[0], new_args[2]].into(); - }, + } _ => (), } } @@ -333,16 +479,27 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc args } -pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc, 'tcx>, mut return_value: RValue<'gcc>, func_name: &str, args: &[RValue<'gcc>], args_adjusted: bool, orig_args: &[RValue<'gcc>]) -> RValue<'gcc> { +pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>( + builder: &Builder<'a, 'gcc, 'tcx>, + mut return_value: RValue<'gcc>, + func_name: &str, + args: &[RValue<'gcc>], + args_adjusted: bool, + orig_args: &[RValue<'gcc>], +) -> RValue<'gcc> { match func_name { "__builtin_ia32_vfmaddss3_round" | "__builtin_ia32_vfmaddsd3_round" => { - #[cfg(feature="master")] + #[cfg(feature = "master")] { let zero = builder.context.new_rvalue_zero(builder.int_type); - return_value = builder.context.new_vector_access(None, return_value, zero).to_rvalue(); + return_value = + builder.context.new_vector_access(None, return_value, zero).to_rvalue(); } - }, - "__builtin_ia32_addcarryx_u64" | "__builtin_ia32_sbb_u64" | "__builtin_ia32_addcarryx_u32" | "__builtin_ia32_sbb_u32" => { + } + "__builtin_ia32_addcarryx_u64" + | "__builtin_ia32_sbb_u64" + | "__builtin_ia32_addcarryx_u32" + | "__builtin_ia32_sbb_u32" => { // Both llvm.x86.addcarry.32 and llvm.x86.addcarryx.u32 points to the same GCC builtin, // but only the former requires adjusting the return value. // Those 2 LLVM intrinsics differ by their argument count, that's why we check if the @@ -351,10 +508,16 @@ pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc, let last_arg = args.last().expect("last arg"); let field1 = builder.context.new_field(None, builder.u8_type, "carryFlag"); let field2 = builder.context.new_field(None, args[1].get_type(), "carryResult"); - let struct_type = builder.context.new_struct_type(None, "addcarryResult", &[field1, field2]); - return_value = builder.context.new_struct_constructor(None, struct_type.as_type(), None, &[return_value, last_arg.dereference(None).to_rvalue()]); + let struct_type = + builder.context.new_struct_type(None, "addcarryResult", &[field1, field2]); + return_value = builder.context.new_struct_constructor( + None, + struct_type.as_type(), + None, + &[return_value, last_arg.dereference(None).to_rvalue()], + ); } - }, + } "__builtin_ia32_stmxcsr" => { // The builtin __builtin_ia32_stmxcsr returns a value while llvm.x86.sse.stmxcsr writes // the result in its pointer argument. @@ -366,20 +529,24 @@ pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc, // The return value was assigned to the result pointer above. In order to not call the // builtin twice, we overwrite the return value with a dummy value. return_value = builder.context.new_rvalue_zero(builder.int_type); - }, + } "__builtin_ia32_rdrand64_step" => { let random_number = args[0].dereference(None).to_rvalue(); - let success_variable = builder.current_func().new_local(None, return_value.get_type(), "success"); + let success_variable = + builder.current_func().new_local(None, return_value.get_type(), "success"); builder.llbb().add_assignment(None, success_variable, return_value); let field1 = builder.context.new_field(None, random_number.get_type(), "random_number"); let field2 = builder.context.new_field(None, return_value.get_type(), "success"); - let struct_type = builder.context.new_struct_type(None, "rdrand_result", &[field1, field2]); - return_value = builder.context.new_struct_constructor(None, struct_type.as_type(), None, &[ - random_number, - success_variable.to_rvalue(), - ]); - }, + let struct_type = + builder.context.new_struct_type(None, "rdrand_result", &[field1, field2]); + return_value = builder.context.new_struct_constructor( + None, + struct_type.as_type(), + None, + &[random_number, success_variable.to_rvalue()], + ); + } _ => (), } @@ -391,23 +558,33 @@ pub fn ignore_arg_cast(func_name: &str, index: usize, args_len: usize) -> bool { match func_name { // NOTE: these intrinsics have missing parameters before the last one, so ignore the // last argument type check. - "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask" - | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" | "__builtin_ia32_sqrtps512_mask" - | "__builtin_ia32_sqrtpd512_mask" | "__builtin_ia32_addps512_mask" | "__builtin_ia32_addpd512_mask" - | "__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask" - | "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask" - | "__builtin_ia32_divps512_mask" | "__builtin_ia32_divpd512_mask" - | "__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask" - | "__builtin_ia32_cvtdq2ps512_mask" | "__builtin_ia32_cvtudq2ps512_mask" => { - if index == args_len - 1 { - return true; - } - }, + "__builtin_ia32_maxps512_mask" + | "__builtin_ia32_maxpd512_mask" + | "__builtin_ia32_minps512_mask" + | "__builtin_ia32_minpd512_mask" + | "__builtin_ia32_sqrtps512_mask" + | "__builtin_ia32_sqrtpd512_mask" + | "__builtin_ia32_addps512_mask" + | "__builtin_ia32_addpd512_mask" + | "__builtin_ia32_subps512_mask" + | "__builtin_ia32_subpd512_mask" + | "__builtin_ia32_mulps512_mask" + | "__builtin_ia32_mulpd512_mask" + | "__builtin_ia32_divps512_mask" + | "__builtin_ia32_divpd512_mask" + | "__builtin_ia32_vfmaddsubps512_mask" + | "__builtin_ia32_vfmaddsubpd512_mask" + | "__builtin_ia32_cvtdq2ps512_mask" + | "__builtin_ia32_cvtudq2ps512_mask" => { + if index == args_len - 1 { + return true; + } + } "__builtin_ia32_rndscaless_mask_round" | "__builtin_ia32_rndscalesd_mask_round" => { if index == 2 || index == 3 { return true; } - }, + } "__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => { // Since there are two LLVM intrinsics that map to each of these GCC builtins and only // one of them has a missing parameter before the last one, we check the number of @@ -415,49 +592,50 @@ pub fn ignore_arg_cast(func_name: &str, index: usize, args_len: usize) -> bool { if args_len == 4 && index == args_len - 1 { return true; } - }, + } // NOTE: the LLVM intrinsic receives 3 floats, but the GCC builtin requires 3 vectors. "__builtin_ia32_vfmaddss3_round" | "__builtin_ia32_vfmaddsd3_round" => return true, - "__builtin_ia32_vplzcntd_512_mask" | "__builtin_ia32_vplzcntd_256_mask" | "__builtin_ia32_vplzcntd_128_mask" - | "__builtin_ia32_vplzcntq_512_mask" | "__builtin_ia32_vplzcntq_256_mask" | "__builtin_ia32_vplzcntq_128_mask" => { + "__builtin_ia32_vplzcntd_512_mask" + | "__builtin_ia32_vplzcntd_256_mask" + | "__builtin_ia32_vplzcntd_128_mask" + | "__builtin_ia32_vplzcntq_512_mask" + | "__builtin_ia32_vplzcntq_256_mask" + | "__builtin_ia32_vplzcntq_128_mask" => { if index == args_len - 1 { return true; } - }, + } _ => (), } false } -#[cfg(not(feature="master"))] +#[cfg(not(feature = "master"))] pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function<'gcc> { - let gcc_name = - match name { - "llvm.x86.sse2.pause" => { - // NOTE: pause is only a hint, so we use a dummy built-in because target built-ins - // are not supported in libgccjit 12. - "__builtin_inff" - }, - "llvm.x86.xgetbv" => { - "__builtin_trap" - }, - _ => unimplemented!("unsupported LLVM intrinsic {}", name), - }; + let gcc_name = match name { + "llvm.x86.sse2.pause" => { + // NOTE: pause is only a hint, so we use a dummy built-in because target built-ins + // are not supported in libgccjit 12. + "__builtin_inff" + } + "llvm.x86.xgetbv" => "__builtin_trap", + _ => unimplemented!("unsupported LLVM intrinsic {}", name), + }; let func = cx.context.get_builtin_function(gcc_name); cx.functions.borrow_mut().insert(gcc_name.to_string(), func); return func; } -#[cfg(feature="master")] +#[cfg(feature = "master")] pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function<'gcc> { match name { "llvm.prefetch" => { let gcc_name = "__builtin_prefetch"; let func = cx.context.get_builtin_function(gcc_name); cx.functions.borrow_mut().insert(gcc_name.to_string(), func); - return func - }, + return func; + } _ => (), } diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs index d43f5d74757..a6c8b72e851 100644 --- a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs +++ b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs @@ -1,43 +1,48 @@ pub mod llvm; mod simd; -#[cfg(feature="master")] +#[cfg(feature = "master")] use std::iter; -#[cfg(feature="master")] +#[cfg(feature = "master")] use gccjit::FunctionType; use gccjit::{ComparisonOp, Function, RValue, ToRValue, Type, UnaryOp}; -use rustc_codegen_ssa::MemFlags; use rustc_codegen_ssa::base::wants_msvc_seh; use rustc_codegen_ssa::common::IntPredicate; +use rustc_codegen_ssa::errors::InvalidMonomorphization; use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue}; use rustc_codegen_ssa::mir::place::PlaceRef; -use rustc_codegen_ssa::traits::{ArgAbiMethods, BuilderMethods, ConstMethods, IntrinsicCallMethods}; -#[cfg(feature="master")] +use rustc_codegen_ssa::traits::{ + ArgAbiMethods, BuilderMethods, ConstMethods, IntrinsicCallMethods, +}; +#[cfg(feature = "master")] use rustc_codegen_ssa::traits::{BaseTypeMethods, MiscMethods}; -use rustc_codegen_ssa::errors::InvalidMonomorphization; +use rustc_codegen_ssa::MemFlags; use rustc_middle::bug; -use rustc_middle::ty::{self, Instance, Ty}; use rustc_middle::ty::layout::LayoutOf; -#[cfg(feature="master")] +#[cfg(feature = "master")] use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt}; -use rustc_span::{Span, Symbol, sym}; -use rustc_target::abi::HasDataLayout; +use rustc_middle::ty::{self, Instance, Ty}; +use rustc_span::{sym, Span, Symbol}; use rustc_target::abi::call::{ArgAbi, FnAbi, PassMode}; -use rustc_target::spec::PanicStrategy; -#[cfg(feature="master")] +use rustc_target::abi::HasDataLayout; +#[cfg(feature = "master")] use rustc_target::spec::abi::Abi; +use rustc_target::spec::PanicStrategy; -use crate::abi::GccType; -#[cfg(feature="master")] +#[cfg(feature = "master")] use crate::abi::FnAbiGccExt; +use crate::abi::GccType; use crate::builder::Builder; use crate::common::{SignType, TypeReflection}; use crate::context::CodegenCx; -use crate::type_of::LayoutGccExt; use crate::intrinsic::simd::generic_simd_intrinsic; +use crate::type_of::LayoutGccExt; -fn get_simple_intrinsic<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, name: Symbol) -> Option<Function<'gcc>> { +fn get_simple_intrinsic<'gcc, 'tcx>( + cx: &CodegenCx<'gcc, 'tcx>, + name: Symbol, +) -> Option<Function<'gcc>> { let gcc_name = match name { sym::sqrtf32 => "sqrtf", sym::sqrtf64 => "sqrt", @@ -90,7 +95,14 @@ fn get_simple_intrinsic<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, name: Symbol) -> } impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { - fn codegen_intrinsic_call(&mut self, instance: Instance<'tcx>, fn_abi: &FnAbi<'tcx, Ty<'tcx>>, args: &[OperandRef<'tcx, RValue<'gcc>>], llresult: RValue<'gcc>, span: Span) -> Result<(), Instance<'tcx>> { + fn codegen_intrinsic_call( + &mut self, + instance: Instance<'tcx>, + fn_abi: &FnAbi<'tcx, Ty<'tcx>>, + args: &[OperandRef<'tcx, RValue<'gcc>>], + llresult: RValue<'gcc>, + span: Span, + ) -> Result<(), Instance<'tcx>> { let tcx = self.tcx; let callee_ty = instance.ty(tcx, ty::ParamEnv::reveal_all()); @@ -110,268 +122,274 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { let result = PlaceRef::new_sized(llresult, fn_abi.ret.layout); let simple = get_simple_intrinsic(self, name); - let llval = - match name { - _ if simple.is_some() => { - // FIXME(antoyo): remove this cast when the API supports function. - let func = unsafe { std::mem::transmute(simple.expect("simple")) }; - self.call(self.type_void(), None, None, func, &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(), None) - }, - sym::likely => { - self.expect(args[0].immediate(), true) - } - sym::unlikely => { - self.expect(args[0].immediate(), false) - } - sym::is_val_statically_known => { - let a = args[0].immediate(); - let builtin = self.context.get_builtin_function("__builtin_constant_p"); - let res = self.context.new_call(None, builtin, &[a]); - self.icmp(IntPredicate::IntEQ, res, self.const_i32(0)) - } - sym::catch_unwind => { - try_intrinsic( - self, - args[0].immediate(), - args[1].immediate(), - args[2].immediate(), - llresult, - ); - return Ok(()); - } - sym::breakpoint => { - unimplemented!(); - } - sym::va_copy => { - unimplemented!(); - } - sym::va_arg => { - unimplemented!(); - } + let llval = match name { + _ if simple.is_some() => { + // FIXME(antoyo): remove this cast when the API supports function. + let func = unsafe { std::mem::transmute(simple.expect("simple")) }; + self.call( + self.type_void(), + None, + None, + func, + &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(), + None, + ) + } + sym::likely => self.expect(args[0].immediate(), true), + sym::unlikely => self.expect(args[0].immediate(), false), + sym::is_val_statically_known => { + let a = args[0].immediate(); + let builtin = self.context.get_builtin_function("__builtin_constant_p"); + let res = self.context.new_call(None, builtin, &[a]); + self.icmp(IntPredicate::IntEQ, res, self.const_i32(0)) + } + sym::catch_unwind => { + try_intrinsic( + self, + args[0].immediate(), + args[1].immediate(), + args[2].immediate(), + llresult, + ); + return Ok(()); + } + sym::breakpoint => { + unimplemented!(); + } + sym::va_copy => { + unimplemented!(); + } + sym::va_arg => { + unimplemented!(); + } - sym::volatile_load | sym::unaligned_volatile_load => { - let tp_ty = fn_args.type_at(0); - let ptr = args[0].immediate(); - let load = - if let PassMode::Cast { cast: ty, pad_i32: _ } = &fn_abi.ret.mode { - let gcc_ty = ty.gcc_type(self); - self.volatile_load(gcc_ty, ptr) + sym::volatile_load | sym::unaligned_volatile_load => { + let tp_ty = fn_args.type_at(0); + let ptr = args[0].immediate(); + let load = if let PassMode::Cast { cast: ty, pad_i32: _ } = &fn_abi.ret.mode { + let gcc_ty = ty.gcc_type(self); + self.volatile_load(gcc_ty, ptr) + } else { + self.volatile_load(self.layout_of(tp_ty).gcc_type(self), ptr) + }; + // TODO(antoyo): set alignment. + self.to_immediate(load, self.layout_of(tp_ty)) + } + sym::volatile_store => { + let dst = args[0].deref(self.cx()); + args[1].val.volatile_store(self, dst); + return Ok(()); + } + sym::unaligned_volatile_store => { + let dst = args[0].deref(self.cx()); + args[1].val.unaligned_volatile_store(self, dst); + return Ok(()); + } + sym::prefetch_read_data + | sym::prefetch_write_data + | sym::prefetch_read_instruction + | sym::prefetch_write_instruction => { + unimplemented!(); + } + sym::ctlz + | sym::ctlz_nonzero + | sym::cttz + | sym::cttz_nonzero + | sym::ctpop + | sym::bswap + | sym::bitreverse + | sym::rotate_left + | sym::rotate_right + | sym::saturating_add + | sym::saturating_sub => { + let ty = arg_tys[0]; + match int_type_width_signed(ty, self) { + Some((width, signed)) => match name { + sym::ctlz | sym::cttz => { + let func = self.current_func.borrow().expect("func"); + let then_block = func.new_block("then"); + let else_block = func.new_block("else"); + let after_block = func.new_block("after"); + + let arg = args[0].immediate(); + let result = func.new_local(None, arg.get_type(), "zeros"); + let zero = self.cx.gcc_zero(arg.get_type()); + let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero); + self.llbb().end_with_conditional(None, cond, then_block, else_block); + + let zero_result = self.cx.gcc_uint(arg.get_type(), width); + then_block.add_assignment(None, result, zero_result); + then_block.end_with_jump(None, after_block); + + // NOTE: since jumps were added in a place + // count_leading_zeroes() does not expect, the current block + // in the state need to be updated. + self.switch_to_block(else_block); + + let zeros = match name { + sym::ctlz => self.count_leading_zeroes(width, arg), + sym::cttz => self.count_trailing_zeroes(width, arg), + _ => unreachable!(), + }; + self.llbb().add_assignment(None, result, zeros); + self.llbb().end_with_jump(None, after_block); + + // NOTE: since jumps were added in a place rustc does not + // expect, the current block in the state need to be updated. + self.switch_to_block(after_block); + + result.to_rvalue() } - else { - self.volatile_load(self.layout_of(tp_ty).gcc_type(self), ptr) - }; - // TODO(antoyo): set alignment. - self.to_immediate(load, self.layout_of(tp_ty)) - } - sym::volatile_store => { - let dst = args[0].deref(self.cx()); - args[1].val.volatile_store(self, dst); - return Ok(()); - } - sym::unaligned_volatile_store => { - let dst = args[0].deref(self.cx()); - args[1].val.unaligned_volatile_store(self, dst); - return Ok(()); - } - sym::prefetch_read_data - | sym::prefetch_write_data - | sym::prefetch_read_instruction - | sym::prefetch_write_instruction => { - unimplemented!(); - } - sym::ctlz - | sym::ctlz_nonzero - | sym::cttz - | sym::cttz_nonzero - | sym::ctpop - | sym::bswap - | sym::bitreverse - | sym::rotate_left - | sym::rotate_right - | sym::saturating_add - | sym::saturating_sub => { - let ty = arg_tys[0]; - match int_type_width_signed(ty, self) { - Some((width, signed)) => match name { - sym::ctlz | sym::cttz => { - let func = self.current_func.borrow().expect("func"); - let then_block = func.new_block("then"); - let else_block = func.new_block("else"); - let after_block = func.new_block("after"); - - let arg = args[0].immediate(); - let result = func.new_local(None, arg.get_type(), "zeros"); - let zero = self.cx.gcc_zero(arg.get_type()); - let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero); - self.llbb().end_with_conditional(None, cond, then_block, else_block); - - let zero_result = self.cx.gcc_uint(arg.get_type(), width); - then_block.add_assignment(None, result, zero_result); - then_block.end_with_jump(None, after_block); - - // NOTE: since jumps were added in a place - // count_leading_zeroes() does not expect, the current block - // in the state need to be updated. - self.switch_to_block(else_block); - - let zeros = - match name { - sym::ctlz => self.count_leading_zeroes(width, arg), - sym::cttz => self.count_trailing_zeroes(width, arg), - _ => unreachable!(), - }; - self.llbb().add_assignment(None, result, zeros); - self.llbb().end_with_jump(None, after_block); - - // NOTE: since jumps were added in a place rustc does not - // expect, the current block in the state need to be updated. - self.switch_to_block(after_block); - - result.to_rvalue() - } - sym::ctlz_nonzero => { - self.count_leading_zeroes(width, args[0].immediate()) - }, - sym::cttz_nonzero => { - self.count_trailing_zeroes(width, args[0].immediate()) - } - sym::ctpop => self.pop_count(args[0].immediate()), - sym::bswap => { - if width == 8 { - args[0].immediate() // byte swap a u8/i8 is just a no-op - } - else { - self.gcc_bswap(args[0].immediate(), width) - } - }, - sym::bitreverse => self.bit_reverse(width, args[0].immediate()), - sym::rotate_left | sym::rotate_right => { - // TODO(antoyo): implement using algorithm from: - // https://blog.regehr.org/archives/1063 - // for other platforms. - let is_left = name == sym::rotate_left; - let val = args[0].immediate(); - let raw_shift = args[1].immediate(); - if is_left { - self.rotate_left(val, raw_shift, width) - } - else { - self.rotate_right(val, raw_shift, width) - } - }, - sym::saturating_add => { - self.saturating_add(args[0].immediate(), args[1].immediate(), signed, width) - }, - sym::saturating_sub => { - self.saturating_sub(args[0].immediate(), args[1].immediate(), signed, width) - }, - _ => bug!(), - }, - None => { - tcx.dcx().emit_err(InvalidMonomorphization::BasicIntegerType { span, name, ty }); - return Ok(()); + sym::ctlz_nonzero => self.count_leading_zeroes(width, args[0].immediate()), + sym::cttz_nonzero => self.count_trailing_zeroes(width, args[0].immediate()), + sym::ctpop => self.pop_count(args[0].immediate()), + sym::bswap => { + if width == 8 { + args[0].immediate() // byte swap a u8/i8 is just a no-op + } else { + self.gcc_bswap(args[0].immediate(), width) } } - } - - sym::raw_eq => { - use rustc_target::abi::Abi::*; - let tp_ty = fn_args.type_at(0); - let layout = self.layout_of(tp_ty).layout; - let _use_integer_compare = match layout.abi() { - Scalar(_) | ScalarPair(_, _) => true, - Uninhabited | Vector { .. } => false, - Aggregate { .. } => { - // For rusty ABIs, small aggregates are actually passed - // as `RegKind::Integer` (see `FnAbi::adjust_for_abi`), - // so we re-use that same threshold here. - layout.size() <= self.data_layout().pointer_size * 2 + sym::bitreverse => self.bit_reverse(width, args[0].immediate()), + sym::rotate_left | sym::rotate_right => { + // TODO(antoyo): implement using algorithm from: + // https://blog.regehr.org/archives/1063 + // for other platforms. + let is_left = name == sym::rotate_left; + let val = args[0].immediate(); + let raw_shift = args[1].immediate(); + if is_left { + self.rotate_left(val, raw_shift, width) + } else { + self.rotate_right(val, raw_shift, width) + } } - }; - - let a = args[0].immediate(); - let b = args[1].immediate(); - if layout.size().bytes() == 0 { - self.const_bool(true) - } - /*else if use_integer_compare { - let integer_ty = self.type_ix(layout.size.bits()); // FIXME(antoyo): LLVM creates an integer of 96 bits for [i32; 3], but gcc doesn't support this, so it creates an integer of 128 bits. - let ptr_ty = self.type_ptr_to(integer_ty); - let a_ptr = self.bitcast(a, ptr_ty); - let a_val = self.load(integer_ty, a_ptr, layout.align.abi); - let b_ptr = self.bitcast(b, ptr_ty); - let b_val = self.load(integer_ty, b_ptr, layout.align.abi); - self.icmp(IntPredicate::IntEQ, a_val, b_val) - }*/ - else { - let void_ptr_type = self.context.new_type::<*const ()>(); - let a_ptr = self.bitcast(a, void_ptr_type); - let b_ptr = self.bitcast(b, void_ptr_type); - let n = self.context.new_cast(None, self.const_usize(layout.size().bytes()), self.sizet_type); - let builtin = self.context.get_builtin_function("memcmp"); - let cmp = self.context.new_call(None, builtin, &[a_ptr, b_ptr, n]); - self.icmp(IntPredicate::IntEQ, cmp, self.const_i32(0)) + sym::saturating_add => self.saturating_add( + args[0].immediate(), + args[1].immediate(), + signed, + width, + ), + sym::saturating_sub => self.saturating_sub( + args[0].immediate(), + args[1].immediate(), + signed, + width, + ), + _ => bug!(), + }, + None => { + tcx.dcx().emit_err(InvalidMonomorphization::BasicIntegerType { + span, + name, + ty, + }); + return Ok(()); } } + } - sym::compare_bytes => { - let a = args[0].immediate(); - let b = args[1].immediate(); - let n = args[2].immediate(); + sym::raw_eq => { + use rustc_target::abi::Abi::*; + let tp_ty = fn_args.type_at(0); + let layout = self.layout_of(tp_ty).layout; + let _use_integer_compare = match layout.abi() { + Scalar(_) | ScalarPair(_, _) => true, + Uninhabited | Vector { .. } => false, + Aggregate { .. } => { + // For rusty ABIs, small aggregates are actually passed + // as `RegKind::Integer` (see `FnAbi::adjust_for_abi`), + // so we re-use that same threshold here. + layout.size() <= self.data_layout().pointer_size * 2 + } + }; + let a = args[0].immediate(); + let b = args[1].immediate(); + if layout.size().bytes() == 0 { + self.const_bool(true) + } + /*else if use_integer_compare { + let integer_ty = self.type_ix(layout.size.bits()); // FIXME(antoyo): LLVM creates an integer of 96 bits for [i32; 3], but gcc doesn't support this, so it creates an integer of 128 bits. + let ptr_ty = self.type_ptr_to(integer_ty); + let a_ptr = self.bitcast(a, ptr_ty); + let a_val = self.load(integer_ty, a_ptr, layout.align.abi); + let b_ptr = self.bitcast(b, ptr_ty); + let b_val = self.load(integer_ty, b_ptr, layout.align.abi); + self.icmp(IntPredicate::IntEQ, a_val, b_val) + }*/ + else { let void_ptr_type = self.context.new_type::<*const ()>(); let a_ptr = self.bitcast(a, void_ptr_type); let b_ptr = self.bitcast(b, void_ptr_type); - - // Here we assume that the `memcmp` provided by the target is a NOP for size 0. + let n = self.context.new_cast( + None, + self.const_usize(layout.size().bytes()), + self.sizet_type, + ); let builtin = self.context.get_builtin_function("memcmp"); let cmp = self.context.new_call(None, builtin, &[a_ptr, b_ptr, n]); - self.sext(cmp, self.type_ix(32)) + self.icmp(IntPredicate::IntEQ, cmp, self.const_i32(0)) } + } - sym::black_box => { - args[0].val.store(self, result); + sym::compare_bytes => { + let a = args[0].immediate(); + let b = args[1].immediate(); + let n = args[2].immediate(); - let block = self.llbb(); - let extended_asm = block.add_extended_asm(None, ""); - extended_asm.add_input_operand(None, "r", result.llval); - extended_asm.add_clobber("memory"); - extended_asm.set_volatile_flag(true); + let void_ptr_type = self.context.new_type::<*const ()>(); + let a_ptr = self.bitcast(a, void_ptr_type); + let b_ptr = self.bitcast(b, void_ptr_type); - // We have copied the value to `result` already. - return Ok(()); - } + // Here we assume that the `memcmp` provided by the target is a NOP for size 0. + let builtin = self.context.get_builtin_function("memcmp"); + let cmp = self.context.new_call(None, builtin, &[a_ptr, b_ptr, n]); + self.sext(cmp, self.type_ix(32)) + } - sym::ptr_mask => { - let usize_type = self.context.new_type::<usize>(); - let void_ptr_type = self.context.new_type::<*const ()>(); + sym::black_box => { + args[0].val.store(self, result); - let ptr = args[0].immediate(); - let mask = args[1].immediate(); + let block = self.llbb(); + let extended_asm = block.add_extended_asm(None, ""); + extended_asm.add_input_operand(None, "r", result.llval); + extended_asm.add_clobber("memory"); + extended_asm.set_volatile_flag(true); - let addr = self.bitcast(ptr, usize_type); - let masked = self.and(addr, mask); - self.bitcast(masked, void_ptr_type) - }, + // We have copied the value to `result` already. + return Ok(()); + } - _ if name_str.starts_with("simd_") => { - match generic_simd_intrinsic(self, name, callee_ty, args, ret_ty, llret_ty, span) { - Ok(llval) => llval, - Err(()) => return Ok(()), - } + sym::ptr_mask => { + let usize_type = self.context.new_type::<usize>(); + let void_ptr_type = self.context.new_type::<*const ()>(); + + let ptr = args[0].immediate(); + let mask = args[1].immediate(); + + let addr = self.bitcast(ptr, usize_type); + let masked = self.and(addr, mask); + self.bitcast(masked, void_ptr_type) + } + + _ if name_str.starts_with("simd_") => { + match generic_simd_intrinsic(self, name, callee_ty, args, ret_ty, llret_ty, span) { + Ok(llval) => llval, + Err(()) => return Ok(()), } + } - // Fall back to default body - _ => return Err(Instance::new(instance.def_id(), instance.args)), - }; + // Fall back to default body + _ => return Err(Instance::new(instance.def_id(), instance.args)), + }; if !fn_abi.ret.is_ignore() { if let PassMode::Cast { cast: ty, .. } = &fn_abi.ret.mode { let ptr_llty = self.type_ptr_to(ty.gcc_type(self)); let ptr = self.pointercast(result.llval, ptr_llty); self.store(llval, ptr, result.align); - } - else { + } else { OperandRef::from_immediate_or_packed_pair(self, llval, result.layout) .val .store(self, result); @@ -423,11 +441,21 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { } impl<'a, 'gcc, 'tcx> ArgAbiMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { - fn store_fn_arg(&mut self, arg_abi: &ArgAbi<'tcx, Ty<'tcx>>, idx: &mut usize, dst: PlaceRef<'tcx, Self::Value>) { + fn store_fn_arg( + &mut self, + arg_abi: &ArgAbi<'tcx, Ty<'tcx>>, + idx: &mut usize, + dst: PlaceRef<'tcx, Self::Value>, + ) { arg_abi.store_fn_arg(self, idx, dst) } - fn store_arg(&mut self, arg_abi: &ArgAbi<'tcx, Ty<'tcx>>, val: RValue<'gcc>, dst: PlaceRef<'tcx, RValue<'gcc>>) { + fn store_arg( + &mut self, + arg_abi: &ArgAbi<'tcx, Ty<'tcx>>, + val: RValue<'gcc>, + dst: PlaceRef<'tcx, RValue<'gcc>>, + ) { arg_abi.store(self, val, dst) } @@ -438,8 +466,18 @@ impl<'a, 'gcc, 'tcx> ArgAbiMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { pub trait ArgAbiExt<'gcc, 'tcx> { fn memory_ty(&self, cx: &CodegenCx<'gcc, 'tcx>) -> Type<'gcc>; - fn store(&self, bx: &mut Builder<'_, 'gcc, 'tcx>, val: RValue<'gcc>, dst: PlaceRef<'tcx, RValue<'gcc>>); - fn store_fn_arg(&self, bx: &mut Builder<'_, 'gcc, 'tcx>, idx: &mut usize, dst: PlaceRef<'tcx, RValue<'gcc>>); + fn store( + &self, + bx: &mut Builder<'_, 'gcc, 'tcx>, + val: RValue<'gcc>, + dst: PlaceRef<'tcx, RValue<'gcc>>, + ); + fn store_fn_arg( + &self, + bx: &mut Builder<'_, 'gcc, 'tcx>, + idx: &mut usize, + dst: PlaceRef<'tcx, RValue<'gcc>>, + ); } impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> { @@ -453,17 +491,20 @@ impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> { /// place for the original Rust type of this argument/return. /// Can be used for both storing formal arguments into Rust variables /// or results of call/invoke instructions into their destinations. - fn store(&self, bx: &mut Builder<'_, 'gcc, 'tcx>, val: RValue<'gcc>, dst: PlaceRef<'tcx, RValue<'gcc>>) { + fn store( + &self, + bx: &mut Builder<'_, 'gcc, 'tcx>, + val: RValue<'gcc>, + dst: PlaceRef<'tcx, RValue<'gcc>>, + ) { if self.is_ignore() { return; } if self.is_sized_indirect() { OperandValue::Ref(val, None, self.layout.align.abi).store(bx, dst) - } - else if self.is_unsized_indirect() { + } else if self.is_unsized_indirect() { bug!("unsized `ArgAbi` must be handled through `store_fn_arg`"); - } - else if let PassMode::Cast { ref cast, .. } = self.mode { + } else if let PassMode::Cast { ref cast, .. } = self.mode { // FIXME(eddyb): Figure out when the simpler Store is safe, clang // uses it for i16 -> {i8, i8}, but not for i24 -> {i8, i8, i8}. let can_store_through_cast_ptr = false; @@ -471,8 +512,7 @@ impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> { let cast_ptr_llty = bx.type_ptr_to(cast.gcc_type(bx)); let cast_dst = bx.pointercast(dst.llval, cast_ptr_llty); bx.store(val, cast_dst, self.layout.align.abi); - } - else { + } else { // The actual return type is a struct, but the ABI // adaptation code has cast it into some scalar type. The // code that follows is the only reliable way I have @@ -508,35 +548,44 @@ impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> { bx.lifetime_end(llscratch, scratch_size); } - } - else { + } else { OperandValue::Immediate(val).store(bx, dst); } } - fn store_fn_arg<'a>(&self, bx: &mut Builder<'a, 'gcc, 'tcx>, idx: &mut usize, dst: PlaceRef<'tcx, RValue<'gcc>>) { + fn store_fn_arg<'a>( + &self, + bx: &mut Builder<'a, 'gcc, 'tcx>, + idx: &mut usize, + dst: PlaceRef<'tcx, RValue<'gcc>>, + ) { let mut next = || { let val = bx.current_func().get_param(*idx as i32); *idx += 1; val.to_rvalue() }; match self.mode { - PassMode::Ignore => {}, + PassMode::Ignore => {} PassMode::Pair(..) => { OperandValue::Pair(next(), next()).store(bx, dst); - }, + } PassMode::Indirect { meta_attrs: Some(_), .. } => { OperandValue::Ref(next(), Some(next()), self.layout.align.abi).store(bx, dst); - }, - PassMode::Direct(_) | PassMode::Indirect { meta_attrs: None, .. } | PassMode::Cast { .. } => { + } + PassMode::Direct(_) + | PassMode::Indirect { meta_attrs: None, .. } + | PassMode::Cast { .. } => { let next_arg = next(); self.store(bx, next_arg, dst); - }, + } } } } -fn int_type_width_signed<'gcc, 'tcx>(ty: Ty<'tcx>, cx: &CodegenCx<'gcc, 'tcx>) -> Option<(u64, bool)> { +fn int_type_width_signed<'gcc, 'tcx>( + ty: Ty<'tcx>, + cx: &CodegenCx<'gcc, 'tcx>, +) -> Option<(u64, bool)> { match ty.kind() { ty::Int(t) => Some(( match t { @@ -570,82 +619,76 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let typ = result_type.to_unsigned(self.cx); let value = - if result_type.is_signed(self.cx) { - self.gcc_int_cast(value, typ) - } - else { - value - }; + if result_type.is_signed(self.cx) { self.gcc_int_cast(value, typ) } else { value }; let context = &self.cx.context; - let result = - match width { - 8 | 16 | 32 | 64 => { - let mask = ((1u128 << width) - 1) as u64; - let (m0, m1, m2) = if width > 16 { - ( - context.new_rvalue_from_long(typ, (0x5555555555555555u64 & mask) as i64), - context.new_rvalue_from_long(typ, (0x3333333333333333u64 & mask) as i64), - context.new_rvalue_from_long(typ, (0x0f0f0f0f0f0f0f0fu64 & mask) as i64), - ) - } else { - ( - context.new_rvalue_from_int(typ, (0x5555u64 & mask) as i32), - context.new_rvalue_from_int(typ, (0x3333u64 & mask) as i32), - context.new_rvalue_from_int(typ, (0x0f0fu64 & mask) as i32), - ) - }; - let one = context.new_rvalue_from_int(typ, 1); - let two = context.new_rvalue_from_int(typ, 2); - let four = context.new_rvalue_from_int(typ, 4); - - // First step. - let left = self.lshr(value, one); - let left = self.and(left, m0); - let right = self.and(value, m0); - let right = self.shl(right, one); - let step1 = self.or(left, right); - - // Second step. - let left = self.lshr(step1, two); - let left = self.and(left, m1); - let right = self.and(step1, m1); - let right = self.shl(right, two); - let step2 = self.or(left, right); - - // Third step. - let left = self.lshr(step2, four); - let left = self.and(left, m2); - let right = self.and(step2, m2); - let right = self.shl(right, four); - let step3 = self.or(left, right); - - // Fourth step. - if width == 8 { - step3 - } else { - self.gcc_bswap(step3, width) - } - }, - 128 => { - // TODO(antoyo): find a more efficient implementation? - let sixty_four = self.gcc_int(typ, 64); - let right_shift = self.gcc_lshr(value, sixty_four); - let high = self.gcc_int_cast(right_shift, self.u64_type); - let low = self.gcc_int_cast(value, self.u64_type); - - let reversed_high = self.bit_reverse(64, high); - let reversed_low = self.bit_reverse(64, low); - - let new_low = self.gcc_int_cast(reversed_high, typ); - let new_high = self.shl(self.gcc_int_cast(reversed_low, typ), sixty_four); - - self.gcc_or(new_low, new_high) - }, - _ => { - panic!("cannot bit reverse with width = {}", width); - }, - }; + let result = match width { + 8 | 16 | 32 | 64 => { + let mask = ((1u128 << width) - 1) as u64; + let (m0, m1, m2) = if width > 16 { + ( + context.new_rvalue_from_long(typ, (0x5555555555555555u64 & mask) as i64), + context.new_rvalue_from_long(typ, (0x3333333333333333u64 & mask) as i64), + context.new_rvalue_from_long(typ, (0x0f0f0f0f0f0f0f0fu64 & mask) as i64), + ) + } else { + ( + context.new_rvalue_from_int(typ, (0x5555u64 & mask) as i32), + context.new_rvalue_from_int(typ, (0x3333u64 & mask) as i32), + context.new_rvalue_from_int(typ, (0x0f0fu64 & mask) as i32), + ) + }; + let one = context.new_rvalue_from_int(typ, 1); + let two = context.new_rvalue_from_int(typ, 2); + let four = context.new_rvalue_from_int(typ, 4); + + // First step. + let left = self.lshr(value, one); + let left = self.and(left, m0); + let right = self.and(value, m0); + let right = self.shl(right, one); + let step1 = self.or(left, right); + + // Second step. + let left = self.lshr(step1, two); + let left = self.and(left, m1); + let right = self.and(step1, m1); + let right = self.shl(right, two); + let step2 = self.or(left, right); + + // Third step. + let left = self.lshr(step2, four); + let left = self.and(left, m2); + let right = self.and(step2, m2); + let right = self.shl(right, four); + let step3 = self.or(left, right); + + // Fourth step. + if width == 8 { + step3 + } else { + self.gcc_bswap(step3, width) + } + } + 128 => { + // TODO(antoyo): find a more efficient implementation? + let sixty_four = self.gcc_int(typ, 64); + let right_shift = self.gcc_lshr(value, sixty_four); + let high = self.gcc_int_cast(right_shift, self.u64_type); + let low = self.gcc_int_cast(value, self.u64_type); + + let reversed_high = self.bit_reverse(64, high); + let reversed_low = self.bit_reverse(64, low); + + let new_low = self.gcc_int_cast(reversed_high, typ); + let new_high = self.shl(self.gcc_int_cast(reversed_low, typ), sixty_four); + + self.gcc_or(new_low, new_high, self.location) + } + _ => { + panic!("cannot bit reverse with width = {}", width); + } + }; self.gcc_int_cast(result, result_type) } @@ -685,56 +728,54 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let first_elem = self.context.new_array_access(None, result, zero); let first_value = self.gcc_int_cast(self.context.new_call(None, clzll, &[high]), arg_type); self.llbb() - .add_assignment(None, first_elem, first_value); + .add_assignment(self.location, first_elem, first_value); - let second_elem = self.context.new_array_access(None, result, one); - let cast = self.gcc_int_cast(self.context.new_call(None, clzll, &[low]), arg_type); + let second_elem = self.context.new_array_access(self.location, result, one); + let cast = self.gcc_int_cast(self.context.new_call(self.location, clzll, &[low]), arg_type); let second_value = self.add(cast, sixty_four); self.llbb() - .add_assignment(None, second_elem, second_value); + .add_assignment(self.location, second_elem, second_value); - let third_elem = self.context.new_array_access(None, result, two); + let third_elem = self.context.new_array_access(self.location, result, two); let third_value = self.const_uint(arg_type, 128); self.llbb() - .add_assignment(None, third_elem, third_value); + .add_assignment(self.location, third_elem, third_value); - let not_high = self.context.new_unary_op(None, UnaryOp::LogicalNegate, self.u64_type, high); - let not_low = self.context.new_unary_op(None, UnaryOp::LogicalNegate, self.u64_type, low); + let not_high = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, high); + let not_low = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, low); let not_low_and_not_high = not_low & not_high; let index = not_high + not_low_and_not_high; // NOTE: the following cast is necessary to avoid a GIMPLE verification failure in // gcc. // TODO(antoyo): do the correct verification in libgccjit to avoid an error at the // compilation stage. - let index = self.context.new_cast(None, index, self.i32_type); + let index = self.context.new_cast(self.location, index, self.i32_type); - let res = self.context.new_array_access(None, result, index); + let res = self.context.new_array_access(self.location, result, index); return self.gcc_int_cast(res.to_rvalue(), arg_type); } else { let count_leading_zeroes = self.context.get_builtin_function("__builtin_clzll"); - let arg = self.context.new_cast(None, arg, self.ulonglong_type); + let arg = self.context.new_cast(self.location, arg, self.ulonglong_type); let diff = self.ulonglong_type.get_size() as i64 - arg_type.get_size() as i64; let diff = self.context.new_rvalue_from_long(self.int_type, diff * 8); - let res = self.context.new_call(None, count_leading_zeroes, &[arg]) - diff; - return self.context.new_cast(None, res, arg_type); + let res = self.context.new_call(self.location, count_leading_zeroes, &[arg]) - diff; + return self.context.new_cast(self.location, res, arg_type); }; let count_leading_zeroes = self.context.get_builtin_function(count_leading_zeroes); - let res = self.context.new_call(None, count_leading_zeroes, &[arg]); - self.context.new_cast(None, res, arg_type) + let res = self.context.new_call(self.location, count_leading_zeroes, &[arg]); + self.context.new_cast(self.location, res, arg_type) } fn count_trailing_zeroes(&mut self, _width: u64, arg: RValue<'gcc>) -> RValue<'gcc> { let result_type = arg.get_type(); - let arg = - if result_type.is_signed(self.cx) { - let new_type = result_type.to_unsigned(self.cx); - self.gcc_int_cast(arg, new_type) - } - else { - arg - }; + let arg = if result_type.is_signed(self.cx) { + let new_type = result_type.to_unsigned(self.cx); + self.gcc_int_cast(arg, new_type) + } else { + arg + }; let arg_type = arg.get_type(); let (count_trailing_zeroes, expected_type) = // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here @@ -766,58 +807,56 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let ctzll = self.context.get_builtin_function("__builtin_ctzll"); - let first_elem = self.context.new_array_access(None, result, zero); - let first_value = self.gcc_int_cast(self.context.new_call(None, ctzll, &[low]), arg_type); + let first_elem = self.context.new_array_access(self.location, result, zero); + let first_value = self.gcc_int_cast(self.context.new_call(self.location, ctzll, &[low]), arg_type); self.llbb() - .add_assignment(None, first_elem, first_value); + .add_assignment(self.location, first_elem, first_value); - let second_elem = self.context.new_array_access(None, result, one); - let second_value = self.gcc_add(self.gcc_int_cast(self.context.new_call(None, ctzll, &[high]), arg_type), sixty_four); + let second_elem = self.context.new_array_access(self.location, result, one); + let second_value = self.gcc_add(self.gcc_int_cast(self.context.new_call(self.location, ctzll, &[high]), arg_type), sixty_four); self.llbb() - .add_assignment(None, second_elem, second_value); + .add_assignment(self.location, second_elem, second_value); - let third_elem = self.context.new_array_access(None, result, two); + let third_elem = self.context.new_array_access(self.location, result, two); let third_value = self.gcc_int(arg_type, 128); self.llbb() - .add_assignment(None, third_elem, third_value); + .add_assignment(self.location, third_elem, third_value); - let not_low = self.context.new_unary_op(None, UnaryOp::LogicalNegate, self.u64_type, low); - let not_high = self.context.new_unary_op(None, UnaryOp::LogicalNegate, self.u64_type, high); + let not_low = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, low); + let not_high = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, high); let not_low_and_not_high = not_low & not_high; let index = not_low + not_low_and_not_high; // NOTE: the following cast is necessary to avoid a GIMPLE verification failure in // gcc. // TODO(antoyo): do the correct verification in libgccjit to avoid an error at the // compilation stage. - let index = self.context.new_cast(None, index, self.i32_type); + let index = self.context.new_cast(self.location, index, self.i32_type); - let res = self.context.new_array_access(None, result, index); + let res = self.context.new_array_access(self.location, result, index); return self.gcc_int_cast(res.to_rvalue(), result_type); } else { let count_trailing_zeroes = self.context.get_builtin_function("__builtin_ctzll"); let arg_size = arg_type.get_size(); - let casted_arg = self.context.new_cast(None, arg, self.ulonglong_type); + let casted_arg = self.context.new_cast(self.location, arg, self.ulonglong_type); let byte_diff = self.ulonglong_type.get_size() as i64 - arg_size as i64; let diff = self.context.new_rvalue_from_long(self.int_type, byte_diff * 8); let mask = self.context.new_rvalue_from_long(arg_type, -1); // To get the value with all bits set. - let masked = mask & self.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, arg); - let cond = self.context.new_comparison(None, ComparisonOp::Equals, masked, mask); - let diff = diff * self.context.new_cast(None, cond, self.int_type); - let res = self.context.new_call(None, count_trailing_zeroes, &[casted_arg]) - diff; - return self.context.new_cast(None, res, result_type); + let masked = mask & self.context.new_unary_op(self.location, UnaryOp::BitwiseNegate, arg_type, arg); + let cond = self.context.new_comparison(self.location, ComparisonOp::Equals, masked, mask); + let diff = diff * self.context.new_cast(self.location, cond, self.int_type); + let res = self.context.new_call(self.location, count_trailing_zeroes, &[casted_arg]) - diff; + return self.context.new_cast(self.location, res, result_type); }; let count_trailing_zeroes = self.context.get_builtin_function(count_trailing_zeroes); - let arg = - if arg_type != expected_type { - self.context.new_cast(None, arg, expected_type) - } - else { - arg - }; - let res = self.context.new_call(None, count_trailing_zeroes, &[arg]); - self.context.new_cast(None, res, result_type) + let arg = if arg_type != expected_type { + self.context.new_cast(self.location, arg, expected_type) + } else { + arg + }; + let res = self.context.new_call(self.location, count_trailing_zeroes, &[arg]); + self.context.new_cast(self.location, res, result_type) } fn pop_count(&mut self, value: RValue<'gcc>) -> RValue<'gcc> { @@ -825,13 +864,11 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let result_type = value.get_type(); let value_type = result_type.to_unsigned(self.cx); - let value = - if result_type.is_signed(self.cx) { - self.gcc_int_cast(value, value_type) - } - else { - value - }; + let value = if result_type.is_signed(self.cx) { + self.gcc_int_cast(value, value_type) + } else { + value + }; // only break apart 128-bit ints if they're not natively supported // TODO(antoyo): remove this if/when native 128-bit integers land in libgccjit @@ -859,8 +896,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let counter = self.current_func().new_local(None, counter_type, "popcount_counter"); let val = self.current_func().new_local(None, value_type, "popcount_value"); let zero = self.gcc_zero(counter_type); - self.llbb().add_assignment(None, counter, zero); - self.llbb().add_assignment(None, val, value); + self.llbb().add_assignment(self.location, counter, zero); + self.llbb().add_assignment(self.location, val, value); self.br(loop_head); // check if value isn't zero @@ -874,12 +911,12 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let one = self.gcc_int(value_type, 1); let sub = self.gcc_sub(val.to_rvalue(), one); let op = self.gcc_and(val.to_rvalue(), sub); - loop_body.add_assignment(None, val, op); + loop_body.add_assignment(self.location, val, op); // counter += 1 let one = self.gcc_int(counter_type, 1); let op = self.gcc_add(counter.to_rvalue(), one); - loop_body.add_assignment(None, counter, op); + loop_body.add_assignment(self.location, counter, op); self.br(loop_head); // end of loop @@ -888,66 +925,70 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { } // Algorithm from: https://blog.regehr.org/archives/1063 - fn rotate_left(&mut self, value: RValue<'gcc>, shift: RValue<'gcc>, width: u64) -> RValue<'gcc> { + fn rotate_left( + &mut self, + value: RValue<'gcc>, + shift: RValue<'gcc>, + width: u64, + ) -> RValue<'gcc> { let max = self.const_uint(shift.get_type(), width); let shift = self.urem(shift, max); let lhs = self.shl(value, shift); let result_neg = self.neg(shift); - let result_and = - self.and( - result_neg, - self.const_uint(shift.get_type(), width - 1), - ); + let result_and = self.and(result_neg, self.const_uint(shift.get_type(), width - 1)); let rhs = self.lshr(value, result_and); self.or(lhs, rhs) } // Algorithm from: https://blog.regehr.org/archives/1063 - fn rotate_right(&mut self, value: RValue<'gcc>, shift: RValue<'gcc>, width: u64) -> RValue<'gcc> { + fn rotate_right( + &mut self, + value: RValue<'gcc>, + shift: RValue<'gcc>, + width: u64, + ) -> RValue<'gcc> { let max = self.const_uint(shift.get_type(), width); let shift = self.urem(shift, max); let lhs = self.lshr(value, shift); let result_neg = self.neg(shift); - let result_and = - self.and( - result_neg, - self.const_uint(shift.get_type(), width - 1), - ); + let result_and = self.and(result_neg, self.const_uint(shift.get_type(), width - 1)); let rhs = self.shl(value, result_and); self.or(lhs, rhs) } - fn saturating_add(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>, signed: bool, width: u64) -> RValue<'gcc> { + fn saturating_add( + &mut self, + lhs: RValue<'gcc>, + rhs: RValue<'gcc>, + signed: bool, + width: u64, + ) -> RValue<'gcc> { let result_type = lhs.get_type(); if signed { // Based on algorithm from: https://stackoverflow.com/a/56531252/389119 let func = self.current_func.borrow().expect("func"); - let res = func.new_local(None, result_type, "saturating_sum"); + let res = func.new_local(self.location, result_type, "saturating_sum"); let supports_native_type = self.is_native_int_type(result_type); - let overflow = - if supports_native_type { - let func_name = - match width { - 8 => "__builtin_add_overflow", - 16 => "__builtin_add_overflow", - 32 => "__builtin_sadd_overflow", - 64 => "__builtin_saddll_overflow", - 128 => "__builtin_add_overflow", - _ => unreachable!(), - }; - let overflow_func = self.context.get_builtin_function(func_name); - self.overflow_call(overflow_func, &[lhs, rhs, res.get_address(None)], None) - } - else { - let func_name = - match width { - 128 => "__rust_i128_addo", - _ => unreachable!(), - }; - let (int_result, overflow) = self.operation_with_overflow(func_name, lhs, rhs); - self.llbb().add_assignment(None, res, int_result); - overflow + let overflow = if supports_native_type { + let func_name = match width { + 8 => "__builtin_add_overflow", + 16 => "__builtin_add_overflow", + 32 => "__builtin_sadd_overflow", + 64 => "__builtin_saddll_overflow", + 128 => "__builtin_add_overflow", + _ => unreachable!(), + }; + let overflow_func = self.context.get_builtin_function(func_name); + self.overflow_call(overflow_func, &[lhs, rhs, res.get_address(self.location)], None) + } else { + let func_name = match width { + 128 => "__rust_i128_addo", + _ => unreachable!(), }; + let (int_result, overflow) = self.operation_with_overflow(func_name, lhs, rhs); + self.llbb().add_assignment(self.location, res, int_result); + overflow + }; let then_block = func.new_block("then"); let after_block = func.new_block("after"); @@ -955,83 +996,97 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { // Return `result_type`'s maximum or minimum value on overflow // NOTE: convert the type to unsigned to have an unsigned shift. let unsigned_type = result_type.to_unsigned(&self.cx); - let shifted = self.gcc_lshr(self.gcc_int_cast(lhs, unsigned_type), self.gcc_int(unsigned_type, width as i64 - 1)); + let shifted = self.gcc_lshr( + self.gcc_int_cast(lhs, unsigned_type), + self.gcc_int(unsigned_type, width as i64 - 1), + ); let uint_max = self.gcc_not(self.gcc_int(unsigned_type, 0)); let int_max = self.gcc_lshr(uint_max, self.gcc_int(unsigned_type, 1)); - then_block.add_assignment(None, res, self.gcc_int_cast(self.gcc_add(shifted, int_max), result_type)); - then_block.end_with_jump(None, after_block); + then_block.add_assignment( + self.location, + res, + self.gcc_int_cast(self.gcc_add(shifted, int_max), result_type), + ); + then_block.end_with_jump(self.location, after_block); - self.llbb().end_with_conditional(None, overflow, then_block, after_block); + self.llbb().end_with_conditional(self.location, overflow, then_block, after_block); // NOTE: since jumps were added in a place rustc does not // expect, the current block in the state need to be updated. self.switch_to_block(after_block); res.to_rvalue() - } - else { + } else { // Algorithm from: http://locklessinc.com/articles/sat_arithmetic/ let res = self.gcc_add(lhs, rhs); let cond = self.gcc_icmp(IntPredicate::IntULT, res, lhs); let value = self.gcc_neg(self.gcc_int_cast(cond, result_type)); - self.gcc_or(res, value) + self.gcc_or(res, value, self.location) } } // Algorithm from: https://locklessinc.com/articles/sat_arithmetic/ - fn saturating_sub(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>, signed: bool, width: u64) -> RValue<'gcc> { + fn saturating_sub( + &mut self, + lhs: RValue<'gcc>, + rhs: RValue<'gcc>, + signed: bool, + width: u64, + ) -> RValue<'gcc> { let result_type = lhs.get_type(); if signed { // Based on algorithm from: https://stackoverflow.com/a/56531252/389119 let func = self.current_func.borrow().expect("func"); - let res = func.new_local(None, result_type, "saturating_diff"); + let res = func.new_local(self.location, result_type, "saturating_diff"); let supports_native_type = self.is_native_int_type(result_type); - let overflow = - if supports_native_type { - let func_name = - match width { - 8 => "__builtin_sub_overflow", - 16 => "__builtin_sub_overflow", - 32 => "__builtin_ssub_overflow", - 64 => "__builtin_ssubll_overflow", - 128 => "__builtin_sub_overflow", - _ => unreachable!(), - }; - let overflow_func = self.context.get_builtin_function(func_name); - self.overflow_call(overflow_func, &[lhs, rhs, res.get_address(None)], None) - } - else { - let func_name = - match width { - 128 => "__rust_i128_subo", - _ => unreachable!(), - }; - let (int_result, overflow) = self.operation_with_overflow(func_name, lhs, rhs); - self.llbb().add_assignment(None, res, int_result); - overflow + let overflow = if supports_native_type { + let func_name = match width { + 8 => "__builtin_sub_overflow", + 16 => "__builtin_sub_overflow", + 32 => "__builtin_ssub_overflow", + 64 => "__builtin_ssubll_overflow", + 128 => "__builtin_sub_overflow", + _ => unreachable!(), }; + let overflow_func = self.context.get_builtin_function(func_name); + self.overflow_call(overflow_func, &[lhs, rhs, res.get_address(self.location)], None) + } else { + let func_name = match width { + 128 => "__rust_i128_subo", + _ => unreachable!(), + }; + let (int_result, overflow) = self.operation_with_overflow(func_name, lhs, rhs); + self.llbb().add_assignment(self.location, res, int_result); + overflow + }; let then_block = func.new_block("then"); let after_block = func.new_block("after"); // Return `result_type`'s maximum or minimum value on overflow // NOTE: convert the type to unsigned to have an unsigned shift. - let unsigned_type = result_type.to_unsigned(&self.cx); - let shifted = self.gcc_lshr(self.gcc_int_cast(lhs, unsigned_type), self.gcc_int(unsigned_type, width as i64 - 1)); + let unsigned_type = result_type.to_unsigned(self.cx); + let shifted = self.gcc_lshr( + self.gcc_int_cast(lhs, unsigned_type), + self.gcc_int(unsigned_type, width as i64 - 1), + ); let uint_max = self.gcc_not(self.gcc_int(unsigned_type, 0)); let int_max = self.gcc_lshr(uint_max, self.gcc_int(unsigned_type, 1)); - then_block.add_assignment(None, res, self.gcc_int_cast(self.gcc_add(shifted, int_max), result_type)); - then_block.end_with_jump(None, after_block); + then_block.add_assignment( + self.location, + res, + self.gcc_int_cast(self.gcc_add(shifted, int_max), result_type), + ); + then_block.end_with_jump(self.location, after_block); - self.llbb().end_with_conditional(None, overflow, then_block, after_block); + self.llbb().end_with_conditional(self.location, overflow, then_block, after_block); // NOTE: since jumps were added in a place rustc does not // expect, the current block in the state need to be updated. self.switch_to_block(after_block); res.to_rvalue() - } - else { + } else { let res = self.gcc_sub(lhs, rhs); let comparison = self.gcc_icmp(IntPredicate::IntULE, res, lhs); let value = self.gcc_neg(self.gcc_int_cast(comparison, result_type)); @@ -1040,21 +1095,26 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { } } -fn try_intrinsic<'a, 'b, 'gcc, 'tcx>(bx: &'b mut Builder<'a, 'gcc, 'tcx>, try_func: RValue<'gcc>, data: RValue<'gcc>, _catch_func: RValue<'gcc>, dest: RValue<'gcc>) { +fn try_intrinsic<'a, 'b, 'gcc, 'tcx>( + bx: &'b mut Builder<'a, 'gcc, 'tcx>, + try_func: RValue<'gcc>, + data: RValue<'gcc>, + _catch_func: RValue<'gcc>, + dest: RValue<'gcc>, +) { if bx.sess().panic_strategy() == PanicStrategy::Abort { bx.call(bx.type_void(), None, None, try_func, &[data], None); // Return 0 unconditionally from the intrinsic call; // we can never unwind. let ret_align = bx.tcx.data_layout.i32_align.abi; bx.store(bx.const_i32(0), dest, ret_align); - } - else if wants_msvc_seh(bx.sess()) { - unimplemented!(); - } - else { - #[cfg(feature="master")] + } else { + if wants_msvc_seh(bx.sess()) { + unimplemented!(); + } + #[cfg(feature = "master")] codegen_gnu_try(bx, try_func, data, _catch_func, dest); - #[cfg(not(feature="master"))] + #[cfg(not(feature = "master"))] unimplemented!(); } } @@ -1070,8 +1130,14 @@ fn try_intrinsic<'a, 'b, 'gcc, 'tcx>(bx: &'b mut Builder<'a, 'gcc, 'tcx>, try_fu // function calling it, and that function may already have other personality // functions in play. By calling a shim we're guaranteed that our shim will have // the right personality function. -#[cfg(feature="master")] -fn codegen_gnu_try<'gcc>(bx: &mut Builder<'_, 'gcc, '_>, try_func: RValue<'gcc>, data: RValue<'gcc>, catch_func: RValue<'gcc>, dest: RValue<'gcc>) { +#[cfg(feature = "master")] +fn codegen_gnu_try<'gcc>( + bx: &mut Builder<'_, 'gcc, '_>, + try_func: RValue<'gcc>, + data: RValue<'gcc>, + catch_func: RValue<'gcc>, + dest: RValue<'gcc>, +) { let cx: &CodegenCx<'gcc, '_> = bx.cx; let (llty, func) = get_rust_try_fn(cx, &mut |mut bx| { // Codegens the shims described above: @@ -1095,7 +1161,7 @@ fn codegen_gnu_try<'gcc>(bx: &mut Builder<'_, 'gcc, '_>, try_func: RValue<'gcc>, let catch_func = func.get_param(2).to_rvalue(); let try_func_ty = bx.type_func(&[bx.type_i8p()], bx.type_void()); - let current_block = bx.block.clone(); + let current_block = bx.block; bx.switch_to_block(then); bx.ret(bx.const_i32(0)); @@ -1130,36 +1196,44 @@ fn codegen_gnu_try<'gcc>(bx: &mut Builder<'_, 'gcc, '_>, try_func: RValue<'gcc>, bx.store(ret, dest, i32_align); } - // Helper function used to get a handle to the `__rust_try` function used to // catch exceptions. // // This function is only generated once and is then cached. -#[cfg(feature="master")] -fn get_rust_try_fn<'a, 'gcc, 'tcx>(cx: &'a CodegenCx<'gcc, 'tcx>, codegen: &mut dyn FnMut(Builder<'a, 'gcc, 'tcx>)) -> (Type<'gcc>, Function<'gcc>) { +#[cfg(feature = "master")] +fn get_rust_try_fn<'a, 'gcc, 'tcx>( + cx: &'a CodegenCx<'gcc, 'tcx>, + codegen: &mut dyn FnMut(Builder<'a, 'gcc, 'tcx>), +) -> (Type<'gcc>, Function<'gcc>) { if let Some(llfn) = cx.rust_try_fn.get() { return llfn; } // Define the type up front for the signature of the rust_try function. let tcx = cx.tcx; - let i8p = Ty::new_mut_ptr(tcx,tcx.types.i8); + let i8p = Ty::new_mut_ptr(tcx, tcx.types.i8); // `unsafe fn(*mut i8) -> ()` - let try_fn_ty = Ty::new_fn_ptr(tcx,ty::Binder::dummy(tcx.mk_fn_sig( - iter::once(i8p), - Ty::new_unit(tcx,), - false, - rustc_hir::Unsafety::Unsafe, - Abi::Rust, - ))); + let try_fn_ty = Ty::new_fn_ptr( + tcx, + ty::Binder::dummy(tcx.mk_fn_sig( + iter::once(i8p), + Ty::new_unit(tcx), + false, + rustc_hir::Unsafety::Unsafe, + Abi::Rust, + )), + ); // `unsafe fn(*mut i8, *mut i8) -> ()` - let catch_fn_ty = Ty::new_fn_ptr(tcx,ty::Binder::dummy(tcx.mk_fn_sig( - [i8p, i8p].iter().cloned(), - Ty::new_unit(tcx,), - false, - rustc_hir::Unsafety::Unsafe, - Abi::Rust, - ))); + let catch_fn_ty = Ty::new_fn_ptr( + tcx, + ty::Binder::dummy(tcx.mk_fn_sig( + [i8p, i8p].iter().cloned(), + Ty::new_unit(tcx), + false, + rustc_hir::Unsafety::Unsafe, + Abi::Rust, + )), + ); // `unsafe fn(unsafe fn(*mut i8) -> (), *mut i8, unsafe fn(*mut i8, *mut i8) -> ()) -> i32` let rust_fn_sig = ty::Binder::dummy(cx.tcx.mk_fn_sig( [try_fn_ty, i8p, catch_fn_ty], @@ -1175,8 +1249,13 @@ fn get_rust_try_fn<'a, 'gcc, 'tcx>(cx: &'a CodegenCx<'gcc, 'tcx>, codegen: &mut // Helper function to give a Block to a closure to codegen a shim function. // This is currently primarily used for the `try` intrinsic functions above. -#[cfg(feature="master")] -fn gen_fn<'a, 'gcc, 'tcx>(cx: &'a CodegenCx<'gcc, 'tcx>, name: &str, rust_fn_sig: ty::PolyFnSig<'tcx>, codegen: &mut dyn FnMut(Builder<'a, 'gcc, 'tcx>)) -> (Type<'gcc>, Function<'gcc>) { +#[cfg(feature = "master")] +fn gen_fn<'a, 'gcc, 'tcx>( + cx: &'a CodegenCx<'gcc, 'tcx>, + name: &str, + rust_fn_sig: ty::PolyFnSig<'tcx>, + codegen: &mut dyn FnMut(Builder<'a, 'gcc, 'tcx>), +) -> (Type<'gcc>, Function<'gcc>) { let fn_abi = cx.fn_abi_of_fn_ptr(rust_fn_sig, ty::List::empty()); let return_type = fn_abi.gcc_type(cx).return_type; // FIXME(eddyb) find a nicer way to do this. diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs index d8091724d86..e9af34059a0 100644 --- a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs +++ b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs @@ -1,3 +1,5 @@ +use std::iter::FromIterator; + use gccjit::ToRValue; use gccjit::{BinaryOp, RValue, Type}; #[cfg(feature = "master")] @@ -19,6 +21,8 @@ use rustc_span::{sym, Span, Symbol}; use rustc_target::abi::Align; use crate::builder::Builder; +#[cfg(not(feature = "master"))] +use crate::common::SignType; #[cfg(feature = "master")] use crate::context::CodegenCx; @@ -156,6 +160,197 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( return Ok(compare_simd_types(bx, arg1, arg2, in_elem, llret_ty, cmp_op)); } + let simd_bswap = |bx: &mut Builder<'a, 'gcc, 'tcx>, vector: RValue<'gcc>| -> RValue<'gcc> { + let v_type = vector.get_type(); + let vector_type = v_type.unqualified().dyncast_vector().expect("vector type"); + let elem_type = vector_type.get_element_type(); + let elem_size_bytes = elem_type.get_size(); + if elem_size_bytes == 1 { + return vector; + } + + let type_size_bytes = elem_size_bytes as u64 * in_len; + let shuffle_indices = Vec::from_iter(0..type_size_bytes); + let byte_vector_type = bx.context.new_vector_type(bx.type_u8(), type_size_bytes); + let byte_vector = bx.context.new_bitcast(None, args[0].immediate(), byte_vector_type); + + #[cfg(not(feature = "master"))] + let shuffled = { + let new_elements: Vec<_> = shuffle_indices + .chunks_exact(elem_size_bytes as _) + .flat_map(|x| x.iter().rev()) + .map(|&i| { + let index = bx.context.new_rvalue_from_long(bx.u64_type, i as _); + bx.extract_element(byte_vector, index) + }) + .collect(); + + bx.context.new_rvalue_from_vector(None, byte_vector_type, &new_elements) + }; + #[cfg(feature = "master")] + let shuffled = { + let indices: Vec<_> = shuffle_indices + .chunks_exact(elem_size_bytes as _) + .flat_map(|x| x.iter().rev()) + .map(|&i| bx.context.new_rvalue_from_int(bx.u8_type, i as _)) + .collect(); + + let mask = bx.context.new_rvalue_from_vector(None, byte_vector_type, &indices); + bx.context.new_rvalue_vector_perm(None, byte_vector, byte_vector, mask) + }; + bx.context.new_bitcast(None, shuffled, v_type) + }; + + if name == sym::simd_bswap || name == sym::simd_bitreverse { + require!( + bx.type_kind(bx.element_type(llret_ty)) == TypeKind::Integer, + InvalidMonomorphization::UnsupportedOperation { span, name, in_ty, in_elem } + ); + } + + if name == sym::simd_bswap { + return Ok(simd_bswap(bx, args[0].immediate())); + } + + // We use a different algorithm from non-vector bitreverse to take advantage of most + // processors' vector shuffle units. It works like this: + // 1. Generate pre-reversed low and high nibbles as a vector. + // 2. Byte-swap the input. + // 3. Mask off the low and high nibbles of each byte in the byte-swapped input. + // 4. Shuffle the pre-reversed low and high-nibbles using the masked nibbles as a shuffle mask. + // 5. Combine the results of the shuffle back together and cast back to the original type. + #[cfg(feature = "master")] + if name == sym::simd_bitreverse { + let vector = args[0].immediate(); + let v_type = vector.get_type(); + let vector_type = v_type.unqualified().dyncast_vector().expect("vector type"); + let elem_type = vector_type.get_element_type(); + let elem_size_bytes = elem_type.get_size(); + + let type_size_bytes = elem_size_bytes as u64 * in_len; + // We need to ensure at least 16 entries in our vector type, since the pre-reversed vectors + // we generate below have 16 entries in them. `new_rvalue_vector_perm` requires the mask + // vector to be of the same length as the source vectors. + let byte_vector_type_size = type_size_bytes.max(16); + + let byte_vector_type = bx.context.new_vector_type(bx.u8_type, type_size_bytes); + let long_byte_vector_type = bx.context.new_vector_type(bx.u8_type, byte_vector_type_size); + + // Step 1: Generate pre-reversed low and high nibbles as a vector. + let zero_byte = bx.context.new_rvalue_zero(bx.u8_type); + let hi_nibble_elements: Vec<_> = (0u8..16) + .map(|x| bx.context.new_rvalue_from_int(bx.u8_type, x.reverse_bits() as _)) + .chain((16..byte_vector_type_size).map(|_| zero_byte)) + .collect(); + let hi_nibble = + bx.context.new_rvalue_from_vector(None, long_byte_vector_type, &hi_nibble_elements); + + let lo_nibble_elements: Vec<_> = (0u8..16) + .map(|x| bx.context.new_rvalue_from_int(bx.u8_type, (x.reverse_bits() >> 4) as _)) + .chain((16..byte_vector_type_size).map(|_| zero_byte)) + .collect(); + let lo_nibble = + bx.context.new_rvalue_from_vector(None, long_byte_vector_type, &lo_nibble_elements); + + let mask = bx.context.new_rvalue_from_vector( + None, + long_byte_vector_type, + &vec![bx.context.new_rvalue_from_int(bx.u8_type, 0x0f); byte_vector_type_size as _], + ); + + let four_vec = bx.context.new_rvalue_from_vector( + None, + long_byte_vector_type, + &vec![bx.context.new_rvalue_from_int(bx.u8_type, 4); byte_vector_type_size as _], + ); + + // Step 2: Byte-swap the input. + let swapped = simd_bswap(bx, args[0].immediate()); + let byte_vector = bx.context.new_bitcast(None, swapped, byte_vector_type); + + // We're going to need to extend the vector with zeros to make sure that the types are the + // same, since that's what new_rvalue_vector_perm expects. + let byte_vector = if byte_vector_type_size > type_size_bytes { + let mut byte_vector_elements = Vec::with_capacity(byte_vector_type_size as _); + for i in 0..type_size_bytes { + let idx = bx.context.new_rvalue_from_int(bx.u32_type, i as _); + let val = bx.extract_element(byte_vector, idx); + byte_vector_elements.push(val); + } + for _ in type_size_bytes..byte_vector_type_size { + byte_vector_elements.push(zero_byte); + } + bx.context.new_rvalue_from_vector(None, long_byte_vector_type, &byte_vector_elements) + } else { + bx.context.new_bitcast(None, byte_vector, long_byte_vector_type) + }; + + // Step 3: Mask off the low and high nibbles of each byte in the byte-swapped input. + let masked_hi = (byte_vector >> four_vec) & mask; + let masked_lo = byte_vector & mask; + + // Step 4: Shuffle the pre-reversed low and high-nibbles using the masked nibbles as a shuffle mask. + let hi = bx.context.new_rvalue_vector_perm(None, hi_nibble, hi_nibble, masked_lo); + let lo = bx.context.new_rvalue_vector_perm(None, lo_nibble, lo_nibble, masked_hi); + + // Step 5: Combine the results of the shuffle back together and cast back to the original type. + let result = hi | lo; + let cast_ty = + bx.context.new_vector_type(elem_type, byte_vector_type_size / (elem_size_bytes as u64)); + + // we might need to truncate if sizeof(v_type) < sizeof(cast_type) + if type_size_bytes < byte_vector_type_size { + let cast_result = bx.context.new_bitcast(None, result, cast_ty); + let elems: Vec<_> = (0..in_len) + .map(|i| { + let idx = bx.context.new_rvalue_from_int(bx.u32_type, i as _); + bx.extract_element(cast_result, idx) + }) + .collect(); + return Ok(bx.context.new_rvalue_from_vector(None, v_type, &elems)); + } else { + // avoid the unnecessary truncation as an optimization. + return Ok(bx.context.new_bitcast(None, result, v_type)); + } + } + // since gcc doesn't have vector shuffle methods available in non-patched builds, fallback to + // component-wise bitreverses if they're not available. + #[cfg(not(feature = "master"))] + if name == sym::simd_bitreverse { + let vector = args[0].immediate(); + let vector_ty = vector.get_type(); + let vector_type = vector_ty.unqualified().dyncast_vector().expect("vector type"); + let num_elements = vector_type.get_num_units(); + + let elem_type = vector_type.get_element_type(); + let elem_size_bytes = elem_type.get_size(); + let num_type = elem_type.to_unsigned(bx.cx); + let new_elements: Vec<_> = (0..num_elements) + .map(|idx| { + let index = bx.context.new_rvalue_from_long(num_type, idx as _); + let extracted_value = bx.extract_element(vector, index).to_rvalue(); + bx.bit_reverse(elem_size_bytes as u64 * 8, extracted_value) + }) + .collect(); + return Ok(bx.context.new_rvalue_from_vector(None, vector_ty, &new_elements)); + } + + if name == sym::simd_ctlz || name == sym::simd_cttz { + let vector = args[0].immediate(); + let elements: Vec<_> = (0..in_len) + .map(|i| { + let index = bx.context.new_rvalue_from_long(bx.i32_type, i as i64); + let value = bx.extract_element(vector, index).to_rvalue(); + if name == sym::simd_ctlz { + bx.count_leading_zeroes(value.get_type().get_size() as u64 * 8, value) + } else { + bx.count_trailing_zeroes(value.get_type().get_size() as u64 * 8, value) + } + }) + .collect(); + return Ok(bx.context.new_rvalue_from_vector(None, vector.get_type(), &elements)); + } + if name == sym::simd_shuffle { // Make sure this is actually an array, since typeck only checks the length-suffixed // version of this intrinsic. @@ -504,20 +699,15 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( default: RValue<'gcc>, pointers: RValue<'gcc>, mask: RValue<'gcc>, - pointer_count: usize, bx: &mut Builder<'a, 'gcc, 'tcx>, in_len: u64, - underlying_ty: Ty<'tcx>, invert: bool, ) -> RValue<'gcc> { - let vector_type = if pointer_count > 1 { - bx.context.new_vector_type(bx.usize_type, in_len) - } else { - vector_ty(bx, underlying_ty, in_len) - }; - let elem_type = vector_type.dyncast_vector().expect("vector type").get_element_type(); + let vector_type = default.get_type(); + let elem_type = + vector_type.unqualified().dyncast_vector().expect("vector type").get_element_type(); - let mut values = vec![]; + let mut values = Vec::with_capacity(in_len as usize); for i in 0..in_len { let index = bx.context.new_rvalue_from_long(bx.i32_type, i as i64); let int = bx.context.new_vector_access(None, pointers, index).to_rvalue(); @@ -530,13 +720,15 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( let vector = bx.context.new_rvalue_from_vector(None, vector_type, &values); - let mut mask_types = vec![]; - let mut mask_values = vec![]; + let mut mask_types = Vec::with_capacity(in_len as usize); + let mut mask_values = Vec::with_capacity(in_len as usize); for i in 0..in_len { let index = bx.context.new_rvalue_from_long(bx.i32_type, i as i64); mask_types.push(bx.context.new_field(None, bx.i32_type, "m")); let mask_value = bx.context.new_vector_access(None, mask, index).to_rvalue(); - let masked = bx.context.new_rvalue_from_int(bx.i32_type, in_len as i32) & mask_value; + let mask_value_cast = bx.context.new_cast(None, mask_value, bx.i32_type); + let masked = + bx.context.new_rvalue_from_int(bx.i32_type, in_len as i32) & mask_value_cast; let value = index + masked; mask_values.push(value); } @@ -665,10 +857,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( args[0].immediate(), args[1].immediate(), args[2].immediate(), - pointer_count, bx, in_len, - underlying_ty, false, )); } @@ -779,16 +969,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( } } - let result = gather( - args[0].immediate(), - args[1].immediate(), - args[2].immediate(), - pointer_count, - bx, - in_len, - underlying_ty, - true, - ); + let result = + gather(args[0].immediate(), args[1].immediate(), args[2].immediate(), bx, in_len, true); let pointers = args[1].immediate(); |
