diff options
| author | bjorn3 <17426603+bjorn3@users.noreply.github.com> | 2023-11-07 14:38:10 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-11-07 14:38:10 +0100 |
| commit | 9f426cef383f36bf0725f40182e352f02cd3fbc9 (patch) | |
| tree | 2699ca3ecb41ad62f7d212b12b723d0e03c11004 /src | |
| parent | ef3703694ff6b19d38aff83477bb2ad38f5a58d8 (diff) | |
| parent | 209476e33acbeb14213c2edbb6e877dd251d4943 (diff) | |
| download | rust-9f426cef383f36bf0725f40182e352f02cd3fbc9.tar.gz rust-9f426cef383f36bf0725f40182e352f02cd3fbc9.zip | |
Merge pull request #1416 from afonso360/aarch64-intrinsics-1
Implement AArch64 intrinsics necessary for simd-json
Diffstat (limited to 'src')
| -rw-r--r-- | src/intrinsics/llvm_aarch64.rs | 111 | ||||
| -rw-r--r-- | src/intrinsics/mod.rs | 30 |
2 files changed, 120 insertions, 21 deletions
diff --git a/src/intrinsics/llvm_aarch64.rs b/src/intrinsics/llvm_aarch64.rs index 0c211a06dc4..ee098be1fce 100644 --- a/src/intrinsics/llvm_aarch64.rs +++ b/src/intrinsics/llvm_aarch64.rs @@ -44,7 +44,9 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( }); } - _ if intrinsic.starts_with("llvm.aarch64.neon.sqadd.v") => { + _ if intrinsic.starts_with("llvm.aarch64.neon.sqadd.v") + || intrinsic.starts_with("llvm.aarch64.neon.uqadd.v") => + { intrinsic_args!(fx, args => (x, y); intrinsic); simd_pair_for_each_lane_typed(fx, x, y, ret, &|fx, x_lane, y_lane| { @@ -52,7 +54,9 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( }); } - _ if intrinsic.starts_with("llvm.aarch64.neon.sqsub.v") => { + _ if intrinsic.starts_with("llvm.aarch64.neon.sqsub.v") + || intrinsic.starts_with("llvm.aarch64.neon.uqsub.v") => + { intrinsic_args!(fx, args => (x, y); intrinsic); simd_pair_for_each_lane_typed(fx, x, y, ret, &|fx, x_lane, y_lane| { @@ -156,6 +160,90 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( }); } + _ if intrinsic.starts_with("llvm.aarch64.neon.umaxp.v") => { + intrinsic_args!(fx, args => (x, y); intrinsic); + + simd_horizontal_pair_for_each_lane( + fx, + x, + y, + ret, + &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().umax(x_lane, y_lane), + ); + } + + _ if intrinsic.starts_with("llvm.aarch64.neon.smaxp.v") => { + intrinsic_args!(fx, args => (x, y); intrinsic); + + simd_horizontal_pair_for_each_lane( + fx, + x, + y, + ret, + &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().smax(x_lane, y_lane), + ); + } + + _ if intrinsic.starts_with("llvm.aarch64.neon.uminp.v") => { + intrinsic_args!(fx, args => (x, y); intrinsic); + + simd_horizontal_pair_for_each_lane( + fx, + x, + y, + ret, + &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().umin(x_lane, y_lane), + ); + } + + _ if intrinsic.starts_with("llvm.aarch64.neon.sminp.v") => { + intrinsic_args!(fx, args => (x, y); intrinsic); + + simd_horizontal_pair_for_each_lane( + fx, + x, + y, + ret, + &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().smin(x_lane, y_lane), + ); + } + + _ if intrinsic.starts_with("llvm.aarch64.neon.fminp.v") => { + intrinsic_args!(fx, args => (x, y); intrinsic); + + simd_horizontal_pair_for_each_lane( + fx, + x, + y, + ret, + &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().fmin(x_lane, y_lane), + ); + } + + _ if intrinsic.starts_with("llvm.aarch64.neon.fmaxp.v") => { + intrinsic_args!(fx, args => (x, y); intrinsic); + + simd_horizontal_pair_for_each_lane( + fx, + x, + y, + ret, + &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().fmax(x_lane, y_lane), + ); + } + + _ if intrinsic.starts_with("llvm.aarch64.neon.addp.v") => { + intrinsic_args!(fx, args => (x, y); intrinsic); + + simd_horizontal_pair_for_each_lane( + fx, + x, + y, + ret, + &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().iadd(x_lane, y_lane), + ); + } + // FIXME generalize vector types "llvm.aarch64.neon.tbl1.v16i8" => { intrinsic_args!(fx, args => (t, idx); intrinsic); @@ -172,25 +260,6 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( } } - // FIXME generalize vector types - "llvm.aarch64.neon.umaxp.v16i8" => { - intrinsic_args!(fx, args => (a, b); intrinsic); - - // FIXME add helper for horizontal pairwise operations - for i in 0..8 { - let lane1 = a.value_lane(fx, i * 2).load_scalar(fx); - let lane2 = a.value_lane(fx, i * 2 + 1).load_scalar(fx); - let res = fx.bcx.ins().umax(lane1, lane2); - ret.place_lane(fx, i).to_ptr().store(fx, res, MemFlags::trusted()); - } - for i in 0..8 { - let lane1 = b.value_lane(fx, i * 2).load_scalar(fx); - let lane2 = b.value_lane(fx, i * 2 + 1).load_scalar(fx); - let res = fx.bcx.ins().umax(lane1, lane2); - ret.place_lane(fx, 8 + i).to_ptr().store(fx, res, MemFlags::trusted()); - } - } - /* _ if intrinsic.starts_with("llvm.aarch64.neon.sshl.v") || intrinsic.starts_with("llvm.aarch64.neon.sqshl.v") diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index 58b31a53432..bfeeb117ff5 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -132,6 +132,36 @@ fn simd_pair_for_each_lane<'tcx>( } } +fn simd_horizontal_pair_for_each_lane<'tcx>( + fx: &mut FunctionCx<'_, '_, 'tcx>, + x: CValue<'tcx>, + y: CValue<'tcx>, + ret: CPlace<'tcx>, + f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Ty<'tcx>, Ty<'tcx>, Value, Value) -> Value, +) { + assert_eq!(x.layout(), y.layout()); + let layout = x.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let lane_layout = fx.layout_of(lane_ty); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + let ret_lane_layout = fx.layout_of(ret_lane_ty); + assert_eq!(lane_count, ret_lane_count); + + for lane_idx in 0..lane_count { + let src = if lane_idx < (lane_count / 2) { x } else { y }; + let src_idx = lane_idx % (lane_count / 2); + + let lhs_lane = src.value_lane(fx, src_idx * 2).load_scalar(fx); + let rhs_lane = src.value_lane(fx, src_idx * 2 + 1).load_scalar(fx); + + let res_lane = f(fx, lane_layout.ty, ret_lane_layout.ty, lhs_lane, rhs_lane); + let res_lane = CValue::by_val(res_lane, ret_lane_layout); + + ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane); + } +} + fn simd_trio_for_each_lane<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, x: CValue<'tcx>, |
