diff options
| author | bjorn3 <17426603+bjorn3@users.noreply.github.com> | 2025-02-07 11:01:59 +0000 |
|---|---|---|
| committer | bjorn3 <17426603+bjorn3@users.noreply.github.com> | 2025-02-07 11:01:59 +0000 |
| commit | b004312ee4c8418e5a42cc25b971fa5fc5ac88b7 (patch) | |
| tree | 72f09546187fc628bbcde6ee294eb19a85b514da | |
| parent | ed91b731796942a486a29b568e373a4276161863 (diff) | |
| download | rust-b004312ee4c8418e5a42cc25b971fa5fc5ac88b7.tar.gz rust-b004312ee4c8418e5a42cc25b971fa5fc5ac88b7.zip | |
Implement arm64 vaddlvq_u8 and vld1q_u8_x4 vendor intrinsics
This is required for using the bytecount crate on arm64.
| -rw-r--r-- | src/intrinsics/llvm_aarch64.rs | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/src/intrinsics/llvm_aarch64.rs b/src/intrinsics/llvm_aarch64.rs index 39f6763d9f2..4c59c81296b 100644 --- a/src/intrinsics/llvm_aarch64.rs +++ b/src/intrinsics/llvm_aarch64.rs @@ -17,6 +17,14 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( fx.bcx.ins().fence(); } + "llvm.aarch64.neon.ld1x4.v16i8.p0i8" => { + intrinsic_args!(fx, args => (ptr); intrinsic); + + let ptr = ptr.load_scalar(fx); + let val = CPlace::for_ptr(Pointer::new(ptr), ret.layout()).to_cvalue(fx); + ret.write_cvalue(fx, val); + } + _ if intrinsic.starts_with("llvm.aarch64.neon.abs.v") => { intrinsic_args!(fx, args => (a); intrinsic); @@ -115,6 +123,22 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( ); } + "llvm.aarch64.neon.uaddlv.i32.v16i8" => { + intrinsic_args!(fx, args => (v); intrinsic); + + let mut res_val = fx.bcx.ins().iconst(types::I16, 0); + for lane_idx in 0..16 { + let lane = v.value_lane(fx, lane_idx).load_scalar(fx); + let lane = fx.bcx.ins().uextend(types::I16, lane); + res_val = fx.bcx.ins().iadd(res_val, lane); + } + let res = CValue::by_val( + fx.bcx.ins().uextend(types::I32, res_val), + fx.layout_of(fx.tcx.types.u32), + ); + ret.write_cvalue(fx, res); + } + _ if intrinsic.starts_with("llvm.aarch64.neon.faddv.f32.v") => { intrinsic_args!(fx, args => (v); intrinsic); |
