diff options
| author | Antoni Boucher <bouanto@zoho.com> | 2024-09-24 12:38:01 -0400 |
|---|---|---|
| committer | Antoni Boucher <bouanto@zoho.com> | 2024-09-24 12:38:01 -0400 |
| commit | 88445ee1c2ee96f97d72a102b60142943a817623 (patch) | |
| tree | 318cb62320f8026f1ef2baa6fa342231d4537d45 | |
| parent | cb36d78d7ba5ddd1b148db955121f43aad9f5db4 (diff) | |
| download | rust-88445ee1c2ee96f97d72a102b60142943a817623.tar.gz rust-88445ee1c2ee96f97d72a102b60142943a817623.zip | |
Add missing SIMD intrinsics
| -rw-r--r-- | libgccjit.version | 2 | ||||
| -rw-r--r-- | src/base.rs | 1 | ||||
| -rw-r--r-- | src/builder.rs | 3 | ||||
| -rw-r--r-- | src/intrinsic/llvm.rs | 98 |
4 files changed, 88 insertions, 16 deletions
diff --git a/libgccjit.version b/libgccjit.version index e5f51a197a4..b9bbbd324c3 100644 --- a/libgccjit.version +++ b/libgccjit.version @@ -1 +1 @@ -a0cb76246d8d00ed9847d9874e5d5658049c332d +e744a9459d33864067214741daf5c5bc2a7b88c6 diff --git a/src/base.rs b/src/base.rs index d76011da980..b8f511b73a0 100644 --- a/src/base.rs +++ b/src/base.rs @@ -222,7 +222,6 @@ pub fn compile_codegen_unit( // ... and now that we have everything pre-defined, fill out those definitions. for &(mono_item, _) in &mono_items { - //println!("{:?}", mono_item); mono_item.define::<Builder<'_, '_, '_>>(&cx); } diff --git a/src/builder.rs b/src/builder.rs index f07c5a53f68..408b7bc3caa 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -275,7 +275,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { assert!( (!expected_ty.is_vector() || actual_ty.is_vector()) && (expected_ty.is_vector() || !actual_ty.is_vector()), - "{:?} ({}) -> {:?} ({}), index: {:?}[{}]", + "{:?} (is vector: {}) -> {:?} (is vector: {}), Function: {:?}[{}]", actual_ty, actual_ty.is_vector(), expected_ty, @@ -285,7 +285,6 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { ); // TODO(antoyo): perhaps use __builtin_convertvector for vector casting. // TODO: remove bitcast now that vector types can be compared? - println!("Name: {}", func_name); self.bitcast(actual_val, expected_ty) } } else { diff --git a/src/intrinsic/llvm.rs b/src/intrinsic/llvm.rs index 098c7fbb485..cc6bed1fc9a 100644 --- a/src/intrinsic/llvm.rs +++ b/src/intrinsic/llvm.rs @@ -153,7 +153,11 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>( | "__builtin_ia32_psrav16hi_mask" | "__builtin_ia32_psrav8hi_mask" | "__builtin_ia32_permvarhi256_mask" - | "__builtin_ia32_permvarhi128_mask" => { + | "__builtin_ia32_permvarhi128_mask" + | "__builtin_ia32_maxph128_mask" + | "__builtin_ia32_maxph256_mask" + | "__builtin_ia32_minph128_mask" + | "__builtin_ia32_minph256_mask" => { let mut new_args = args.to_vec(); let arg3_type = gcc_func.get_param_type(2); let vector_type = arg3_type.dyncast_vector().expect("vector type"); @@ -194,7 +198,13 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>( | "__builtin_ia32_cvtqq2ps256_mask" | "__builtin_ia32_cvtuqq2pd128_mask" | "__builtin_ia32_cvtuqq2pd256_mask" - | "__builtin_ia32_cvtuqq2ps256_mask" => { + | "__builtin_ia32_cvtuqq2ps256_mask" + | "__builtin_ia32_vcvtw2ph128_mask" + | "__builtin_ia32_vcvtw2ph256_mask" + | "__builtin_ia32_vcvtuw2ph128_mask" + | "__builtin_ia32_vcvtuw2ph256_mask" + | "__builtin_ia32_vcvtdq2ph256_mask" + | "__builtin_ia32_vcvtudq2ph256_mask" => { let mut new_args = args.to_vec(); // Remove last arg as it doesn't seem to be used in GCC and is always false. new_args.pop(); @@ -296,7 +306,8 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>( "__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask" | "__builtin_ia32_cmpsh_mask_round" - | "__builtin_ia32_vfmaddph512_mask" => { + | "__builtin_ia32_vfmaddph512_mask" + | "__builtin_ia32_vfmaddsubph512_mask" => { let mut new_args = args.to_vec(); let last_arg = new_args.pop().expect("last arg"); let arg4_type = gcc_func.get_param_type(3); @@ -319,9 +330,6 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>( | "__builtin_ia32_vpermi2varpd128_mask" | "__builtin_ia32_vpmadd52huq512_mask" | "__builtin_ia32_vpmadd52luq512_mask" - | "__builtin_ia32_vpmadd52huq256_mask" - | "__builtin_ia32_vpmadd52luq256_mask" - | "__builtin_ia32_vpmadd52huq128_mask" | "__builtin_ia32_vfmaddsubph128_mask" | "__builtin_ia32_vfmaddsubph256_mask" => { let mut new_args = args.to_vec(); @@ -405,7 +413,14 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>( "__builtin_ia32_cvtqq2pd512_mask" | "__builtin_ia32_cvtqq2ps512_mask" | "__builtin_ia32_cvtuqq2pd512_mask" - | "__builtin_ia32_cvtuqq2ps512_mask" => { + | "__builtin_ia32_cvtuqq2ps512_mask" + | "__builtin_ia32_sqrtph512_mask_round" + | "__builtin_ia32_vcvtw2ph512_mask_round" + | "__builtin_ia32_vcvtuw2ph512_mask_round" + | "__builtin_ia32_vcvtdq2ph512_mask_round" + | "__builtin_ia32_vcvtudq2ph512_mask_round" + | "__builtin_ia32_vcvtqq2ph512_mask_round" + | "__builtin_ia32_vcvtuqq2ph512_mask_round" => { let mut old_args = args.to_vec(); let mut new_args = vec![]; new_args.push(old_args.swap_remove(0)); @@ -425,7 +440,9 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>( "__builtin_ia32_addph512_mask_round" | "__builtin_ia32_subph512_mask_round" | "__builtin_ia32_mulph512_mask_round" - | "__builtin_ia32_divph512_mask_round" => { + | "__builtin_ia32_divph512_mask_round" + | "__builtin_ia32_maxph512_mask_round" + | "__builtin_ia32_minph512_mask_round" => { let mut new_args = args.to_vec(); let last_arg = new_args.pop().expect("last arg"); @@ -460,7 +477,9 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>( } } else { match func_name { - "__builtin_ia32_rndscaless_mask_round" | "__builtin_ia32_rndscalesd_mask_round" => { + "__builtin_ia32_rndscaless_mask_round" + | "__builtin_ia32_rndscalesd_mask_round" + | "__builtin_ia32_reducesh_mask_round" => { let new_args = args.to_vec(); let arg3_type = gcc_func.get_param_type(2); let arg3 = builder.context.new_cast(None, new_args[4], arg3_type); @@ -585,6 +604,12 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>( new_args[2] = builder.context.new_cast(None, new_args[2], builder.double_type); args = new_args.into(); } + "__builtin_ia32_sqrtsh_mask_round" => { + // The first two arguments are inverted, so swap them. + let mut new_args = args.to_vec(); + new_args.swap(0, 1); + args = new_args.into(); + } _ => (), } } @@ -1090,9 +1115,9 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function "llvm.x86.avx512.dbpsadbw.128" => "__builtin_ia32_dbpsadbw128_mask", "llvm.x86.avx512.vpmadd52h.uq.512" => "__builtin_ia32_vpmadd52huq512_mask", "llvm.x86.avx512.vpmadd52l.uq.512" => "__builtin_ia32_vpmadd52luq512_mask", - "llvm.x86.avx512.vpmadd52h.uq.256" => "__builtin_ia32_vpmadd52huq256_mask", - "llvm.x86.avx512.vpmadd52l.uq.256" => "__builtin_ia32_vpmadd52luq256_mask", - "llvm.x86.avx512.vpmadd52h.uq.128" => "__builtin_ia32_vpmadd52huq128_mask", + "llvm.x86.avx512.vpmadd52h.uq.256" => "__builtin_ia32_vpmadd52huq256", + "llvm.x86.avx512.vpmadd52l.uq.256" => "__builtin_ia32_vpmadd52luq256", + "llvm.x86.avx512.vpmadd52h.uq.128" => "__builtin_ia32_vpmadd52huq128", "llvm.x86.avx512.vpdpwssd.512" => "__builtin_ia32_vpdpwssd_v16si", "llvm.x86.avx512.vpdpwssd.256" => "__builtin_ia32_vpdpwssd_v8si", "llvm.x86.avx512.vpdpwssd.128" => "__builtin_ia32_vpdpwssd_v4si", @@ -1209,6 +1234,55 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function "llvm.x86.avx512fp16.vfmadd.f16" => "__builtin_ia32_vfmaddsh3_mask", "llvm.x86.avx512fp16.vfmaddsub.ph.128" => "__builtin_ia32_vfmaddsubph128_mask", "llvm.x86.avx512fp16.vfmaddsub.ph.256" => "__builtin_ia32_vfmaddsubph256_mask", + "llvm.x86.avx512fp16.vfmaddsub.ph.512" => "__builtin_ia32_vfmaddsubph512_mask", + "llvm.x86.avx512fp16.sqrt.ph.512" => "__builtin_ia32_sqrtph512_mask_round", + "llvm.x86.avx512fp16.mask.sqrt.sh" => "__builtin_ia32_sqrtsh_mask_round", + "llvm.x86.avx512fp16.max.ph.128" => "__builtin_ia32_maxph128_mask", + "llvm.x86.avx512fp16.max.ph.256" => "__builtin_ia32_maxph256_mask", + "llvm.x86.avx512fp16.max.ph.512" => "__builtin_ia32_maxph512_mask_round", + "llvm.x86.avx512fp16.min.ph.128" => "__builtin_ia32_minph128_mask", + "llvm.x86.avx512fp16.min.ph.256" => "__builtin_ia32_minph256_mask", + "llvm.x86.avx512fp16.min.ph.512" => "__builtin_ia32_minph512_mask_round", + "llvm.x86.avx512fp16.mask.getexp.sh" => "__builtin_ia32_getexpsh_mask_round", + "llvm.x86.avx512fp16.mask.rndscale.ph.128" => "__builtin_ia32_rndscaleph128_mask", + "llvm.x86.avx512fp16.mask.rndscale.ph.256" => "__builtin_ia32_rndscaleph256_mask", + "llvm.x86.avx512fp16.mask.rndscale.ph.512" => "__builtin_ia32_rndscaleph512_mask_round", + "llvm.x86.avx512fp16.mask.scalef.ph.512" => "__builtin_ia32_scalefph512_mask_round", + "llvm.x86.avx512fp16.mask.reduce.ph.512" => "__builtin_ia32_reduceph512_mask_round", + "llvm.x86.avx512fp16.mask.reduce.sh" => "__builtin_ia32_reducesh_mask_round", + "llvm.x86.avx512.sitofp.round.v8f16.v8i16" => "__builtin_ia32_vcvtw2ph128_mask", + "llvm.x86.avx512.sitofp.round.v16f16.v16i16" => "__builtin_ia32_vcvtw2ph256_mask", + "llvm.x86.avx512.sitofp.round.v32f16.v32i16" => "__builtin_ia32_vcvtw2ph512_mask_round", + "llvm.x86.avx512.uitofp.round.v8f16.v8u16" => "__builtin_ia32_vcvtuw2ph128_mask", + "llvm.x86.avx512.uitofp.round.v16f16.v16u16" => "__builtin_ia32_vcvtuw2ph256_mask", + "llvm.x86.avx512.uitofp.round.v32f16.v32u16" => "__builtin_ia32_vcvtuw2ph512_mask_round", + "llvm.x86.avx512.sitofp.round.v8f16.v8i32" => "__builtin_ia32_vcvtdq2ph256_mask", + "llvm.x86.avx512.sitofp.round.v16f16.v16i32" => "__builtin_ia32_vcvtdq2ph512_mask_round", + "llvm.x86.avx512fp16.vcvtsi2sh" => "__builtin_ia32_vcvtsi2sh32_round", + "llvm.x86.avx512.uitofp.round.v8f16.v8u32" => "__builtin_ia32_vcvtudq2ph256_mask", + "llvm.x86.avx512.uitofp.round.v16f16.v16u32" => "__builtin_ia32_vcvtudq2ph512_mask_round", + "llvm.x86.avx512fp16.vcvtusi2sh" => "__builtin_ia32_vcvtusi2sh32_round", + "llvm.x86.avx512.sitofp.round.v8f16.v8i64" => "__builtin_ia32_vcvtqq2ph512_mask_round", + "llvm.x86.avx512.uitofp.round.v8f16.v8u64" => "__builtin_ia32_vcvtuqq2ph512_mask_round", + "llvm.x86.avx512fp16.mask.vcvtps2phx.512" => "__builtin_ia32_vcvtps2phx512_mask_round", + "llvm.x86.avx512fp16.mask.vcvtpd2ph.512" => "__builtin_ia32_vcvtpd2ph512_mask_round", + "llvm.x86.avx512fp16.mask.vcvtph2uw.512" => "__builtin_ia32_vcvtph2uw512_mask_round", + "llvm.x86.avx512fp16.mask.vcvttph2w.512" => "__builtin_ia32_vcvttph2w512_mask_round", + "llvm.x86.avx512fp16.mask.vcvttph2uw.512" => "__builtin_ia32_vcvttph2uw512_mask_round", + "llvm.x86.avx512fp16.mask.vcvtph2dq.512" => "__builtin_ia32_vcvtph2dq512_mask_round", + "llvm.x86.avx512fp16.vcvtsh2si32" => "__builtin_ia32_vcvtsh2si32_round", + "llvm.x86.avx512fp16.mask.vcvtph2udq.512" => "__builtin_ia32_vcvtph2udq512_mask_round", + "llvm.x86.avx512fp16.vcvtsh2usi32" => "__builtin_ia32_vcvtsh2usi32_round", + "llvm.x86.avx512fp16.mask.vcvttph2dq.512" => "__builtin_ia32_vcvttph2dq512_mask_round", + "llvm.x86.avx512fp16.vcvttsh2si32" => "__builtin_ia32_vcvttsh2si32_round", + "llvm.x86.avx512fp16.mask.vcvttph2udq.512" => "__builtin_ia32_vcvttph2udq512_mask_round", + "llvm.x86.avx512fp16.vcvttsh2usi32" => "__builtin_ia32_vcvttsh2usi32_round", + "llvm.x86.avx512fp16.mask.vcvtph2qq.512" => "__builtin_ia32_vcvtph2qq512_mask_round", + "llvm.x86.avx512fp16.mask.vcvtph2uqq.512" => "__builtin_ia32_vcvtph2uqq512_mask_round", + "llvm.x86.avx512fp16.mask.vcvttph2qq.512" => "__builtin_ia32_vcvttph2qq512_mask_round", + "llvm.x86.avx512fp16.mask.vcvttph2uqq.512" => "__builtin_ia32_vcvttph2uqq512_mask_round", + "llvm.x86.avx512fp16.mask.vcvtph2psx.512" => "__builtin_ia32_vcvtph2psx512_mask_round", + "llvm.x86.avx512fp16.mask.vcvtph2pd.512" => "__builtin_ia32_vcvtph2pd512_mask_round", // TODO: support the tile builtins: "llvm.x86.ldtilecfg" => "__builtin_trap", |
