diff options
| author | Sparrow Li <liyuan179@huawei.com> | 2021-04-29 05:59:41 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-04-28 22:59:41 +0100 |
| commit | 07f1d0cae30fecd4839d39c8b529178b7273c6ea (patch) | |
| tree | 4b9bcfa20834d8ed53cf1da9e7229e7322124a09 /library/stdarch/crates | |
| parent | 54a2d8b82a84c9592726b1181b528e344b32915f (diff) | |
| download | rust-07f1d0cae30fecd4839d39c8b529178b7273c6ea.tar.gz rust-07f1d0cae30fecd4839d39c8b529178b7273c6ea.zip | |
Add vmla_n, vmla_lane, vmls_n, vmls_lane neon instructions (#1145)
Diffstat (limited to 'library/stdarch/crates')
| -rw-r--r-- | library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs | 464 | ||||
| -rw-r--r-- | library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs | 1798 | ||||
| -rw-r--r-- | library/stdarch/crates/core_arch/src/lib.rs | 3 | ||||
| -rw-r--r-- | library/stdarch/crates/stdarch-gen/neon.spec | 262 | ||||
| -rw-r--r-- | library/stdarch/crates/stdarch-gen/src/main.rs | 5 | ||||
| -rw-r--r-- | library/stdarch/crates/stdarch-test/src/lib.rs | 2 |
6 files changed, 2528 insertions, 6 deletions
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index b1b60f12ee9..63fa745c5ae 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -2950,6 +2950,118 @@ pub unsafe fn vmlal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uin vmlal_u32(a, b, c) } +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlal2))] +pub unsafe fn vmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t { + vmlal_high_s16(a, b, vdupq_n_s16(c)) +} + +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlal2))] +pub unsafe fn vmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t { + vmlal_high_s32(a, b, vdupq_n_s32(c)) +} + +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlal2))] +pub unsafe fn vmlal_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t { + vmlal_high_u16(a, b, vdupq_n_u16(c)) +} + +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlal2))] +pub unsafe fn vmlal_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t { + vmlal_high_u32(a, b, vdupq_n_u32(c)) +} + +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlal2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_high_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t { + static_assert_imm2!(LANE); + vmlal_high_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlal2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_high_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t { + static_assert_imm3!(LANE); + vmlal_high_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlal2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_high_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t { + static_assert_imm1!(LANE); + vmlal_high_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlal2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_high_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t { + static_assert_imm2!(LANE); + vmlal_high_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlal2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_high_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x8_t, c: uint16x4_t) -> uint32x4_t { + static_assert_imm2!(LANE); + vmlal_high_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlal2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_high_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t { + static_assert_imm3!(LANE); + vmlal_high_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlal2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_high_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x4_t, c: uint32x2_t) -> uint64x2_t { + static_assert_imm1!(LANE); + vmlal_high_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlal2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_high_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t { + static_assert_imm2!(LANE); + vmlal_high_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + /// Floating-point multiply-subtract from accumulator #[inline] #[target_feature(enable = "neon")] @@ -3026,6 +3138,118 @@ pub unsafe fn vmlsl_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uin vmlsl_u32(a, b, c) } +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlsl2))] +pub unsafe fn vmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t { + vmlsl_high_s16(a, b, vdupq_n_s16(c)) +} + +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlsl2))] +pub unsafe fn vmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t { + vmlsl_high_s32(a, b, vdupq_n_s32(c)) +} + +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlsl2))] +pub unsafe fn vmlsl_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t { + vmlsl_high_u16(a, b, vdupq_n_u16(c)) +} + +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlsl2))] +pub unsafe fn vmlsl_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t { + vmlsl_high_u32(a, b, vdupq_n_u32(c)) +} + +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_high_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t { + static_assert_imm2!(LANE); + vmlsl_high_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_high_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t { + static_assert_imm3!(LANE); + vmlsl_high_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_high_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t { + static_assert_imm1!(LANE); + vmlsl_high_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_high_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t { + static_assert_imm2!(LANE); + vmlsl_high_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_high_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x8_t, c: uint16x4_t) -> uint32x4_t { + static_assert_imm2!(LANE); + vmlsl_high_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_high_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t { + static_assert_imm3!(LANE); + vmlsl_high_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_high_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x4_t, c: uint32x2_t) -> uint64x2_t { + static_assert_imm1!(LANE); + vmlsl_high_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_high_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t { + static_assert_imm2!(LANE); + vmlsl_high_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + /// Extract narrow #[inline] #[target_feature(enable = "neon")] @@ -9751,6 +9975,126 @@ mod test { } #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_n_s16() { + let a: i32x4 = i32x4::new(8, 7, 6, 5); + let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: i16 = 2; + let e: i32x4 = i32x4::new(8, 9, 10, 11); + let r: i32x4 = transmute(vmlal_high_n_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_n_s32() { + let a: i64x2 = i64x2::new(8, 7); + let b: i32x4 = i32x4::new(3, 3, 0, 1); + let c: i32 = 2; + let e: i64x2 = i64x2::new(8, 9); + let r: i64x2 = transmute(vmlal_high_n_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_n_u16() { + let a: u32x4 = u32x4::new(8, 7, 6, 5); + let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: u16 = 2; + let e: u32x4 = u32x4::new(8, 9, 10, 11); + let r: u32x4 = transmute(vmlal_high_n_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_n_u32() { + let a: u64x2 = u64x2::new(8, 7); + let b: u32x4 = u32x4::new(3, 3, 0, 1); + let c: u32 = 2; + let e: u64x2 = u64x2::new(8, 9); + let r: u64x2 = transmute(vmlal_high_n_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_lane_s16() { + let a: i32x4 = i32x4::new(8, 7, 6, 5); + let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: i16x4 = i16x4::new(0, 2, 0, 0); + let e: i32x4 = i32x4::new(8, 9, 10, 11); + let r: i32x4 = transmute(vmlal_high_lane_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_laneq_s16() { + let a: i32x4 = i32x4::new(8, 7, 6, 5); + let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0); + let e: i32x4 = i32x4::new(8, 9, 10, 11); + let r: i32x4 = transmute(vmlal_high_laneq_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_lane_s32() { + let a: i64x2 = i64x2::new(8, 7); + let b: i32x4 = i32x4::new(3, 3, 0, 1); + let c: i32x2 = i32x2::new(0, 2); + let e: i64x2 = i64x2::new(8, 9); + let r: i64x2 = transmute(vmlal_high_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_laneq_s32() { + let a: i64x2 = i64x2::new(8, 7); + let b: i32x4 = i32x4::new(3, 3, 0, 1); + let c: i32x4 = i32x4::new(0, 2, 0, 0); + let e: i64x2 = i64x2::new(8, 9); + let r: i64x2 = transmute(vmlal_high_laneq_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_lane_u16() { + let a: u32x4 = u32x4::new(8, 7, 6, 5); + let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: u16x4 = u16x4::new(0, 2, 0, 0); + let e: u32x4 = u32x4::new(8, 9, 10, 11); + let r: u32x4 = transmute(vmlal_high_lane_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_laneq_u16() { + let a: u32x4 = u32x4::new(8, 7, 6, 5); + let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0); + let e: u32x4 = u32x4::new(8, 9, 10, 11); + let r: u32x4 = transmute(vmlal_high_laneq_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_lane_u32() { + let a: u64x2 = u64x2::new(8, 7); + let b: u32x4 = u32x4::new(3, 3, 0, 1); + let c: u32x2 = u32x2::new(0, 2); + let e: u64x2 = u64x2::new(8, 9); + let r: u64x2 = transmute(vmlal_high_lane_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_laneq_u32() { + let a: u64x2 = u64x2::new(8, 7); + let b: u32x4 = u32x4::new(3, 3, 0, 1); + let c: u32x4 = u32x4::new(0, 2, 0, 0); + let e: u64x2 = u64x2::new(8, 9); + let r: u64x2 = transmute(vmlal_high_laneq_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] unsafe fn test_vmls_f64() { let a: f64 = 6.; let b: f64 = 2.; @@ -9831,6 +10175,126 @@ mod test { } #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_n_s16() { + let a: i32x4 = i32x4::new(14, 15, 16, 17); + let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: i16 = 2; + let e: i32x4 = i32x4::new(14, 13, 12, 11); + let r: i32x4 = transmute(vmlsl_high_n_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_n_s32() { + let a: i64x2 = i64x2::new(14, 15); + let b: i32x4 = i32x4::new(3, 3, 0, 1); + let c: i32 = 2; + let e: i64x2 = i64x2::new(14, 13); + let r: i64x2 = transmute(vmlsl_high_n_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_n_u16() { + let a: u32x4 = u32x4::new(14, 15, 16, 17); + let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: u16 = 2; + let e: u32x4 = u32x4::new(14, 13, 12, 11); + let r: u32x4 = transmute(vmlsl_high_n_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_n_u32() { + let a: u64x2 = u64x2::new(14, 15); + let b: u32x4 = u32x4::new(3, 3, 0, 1); + let c: u32 = 2; + let e: u64x2 = u64x2::new(14, 13); + let r: u64x2 = transmute(vmlsl_high_n_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_lane_s16() { + let a: i32x4 = i32x4::new(14, 15, 16, 17); + let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: i16x4 = i16x4::new(0, 2, 0, 0); + let e: i32x4 = i32x4::new(14, 13, 12, 11); + let r: i32x4 = transmute(vmlsl_high_lane_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_laneq_s16() { + let a: i32x4 = i32x4::new(14, 15, 16, 17); + let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0); + let e: i32x4 = i32x4::new(14, 13, 12, 11); + let r: i32x4 = transmute(vmlsl_high_laneq_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_lane_s32() { + let a: i64x2 = i64x2::new(14, 15); + let b: i32x4 = i32x4::new(3, 3, 0, 1); + let c: i32x2 = i32x2::new(0, 2); + let e: i64x2 = i64x2::new(14, 13); + let r: i64x2 = transmute(vmlsl_high_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_laneq_s32() { + let a: i64x2 = i64x2::new(14, 15); + let b: i32x4 = i32x4::new(3, 3, 0, 1); + let c: i32x4 = i32x4::new(0, 2, 0, 0); + let e: i64x2 = i64x2::new(14, 13); + let r: i64x2 = transmute(vmlsl_high_laneq_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_lane_u16() { + let a: u32x4 = u32x4::new(14, 15, 16, 17); + let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: u16x4 = u16x4::new(0, 2, 0, 0); + let e: u32x4 = u32x4::new(14, 13, 12, 11); + let r: u32x4 = transmute(vmlsl_high_lane_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_laneq_u16() { + let a: u32x4 = u32x4::new(14, 15, 16, 17); + let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0); + let e: u32x4 = u32x4::new(14, 13, 12, 11); + let r: u32x4 = transmute(vmlsl_high_laneq_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_lane_u32() { + let a: u64x2 = u64x2::new(14, 15); + let b: u32x4 = u32x4::new(3, 3, 0, 1); + let c: u32x2 = u32x2::new(0, 2); + let e: u64x2 = u64x2::new(14, 13); + let r: u64x2 = transmute(vmlsl_high_lane_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_laneq_u32() { + let a: u64x2 = u64x2::new(14, 15); + let b: u32x4 = u32x4::new(3, 3, 0, 1); + let c: u32x4 = u32x4::new(0, 2, 0, 0); + let e: u64x2 = u64x2::new(14, 13); + let r: u64x2 = transmute(vmlsl_high_laneq_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] unsafe fn test_vmovn_high_s16() { let a: i8x8 = i8x8::new(0, 1, 2, 3, 2, 3, 4, 5); let b: i16x8 = i16x8::new(2, 3, 4, 5, 12, 13, 14, 15); diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs index 0e40deaac7e..7f7c8ffddc5 100644 --- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs @@ -3122,6 +3122,346 @@ pub unsafe fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float simd_add(a, simd_mul(b, c)) } +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmla_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t { + vmla_s16(a, b, vdup_n_s16(c)) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmlaq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t { + vmlaq_s16(a, b, vdupq_n_s16(c)) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmla_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t { + vmla_s32(a, b, vdup_n_s32(c)) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmlaq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t { + vmlaq_s32(a, b, vdupq_n_s32(c)) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmla_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t { + vmla_u16(a, b, vdup_n_u16(c)) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmlaq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t { + vmlaq_u16(a, b, vdupq_n_u16(c)) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmla_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t { + vmla_u32(a, b, vdup_n_u32(c)) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmlaq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t { + vmlaq_u32(a, b, vdupq_n_u32(c)) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +pub unsafe fn vmla_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { + vmla_f32(a, b, vdup_n_f32(c)) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +pub unsafe fn vmlaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { + vmlaq_f32(a, b, vdupq_n_f32(c)) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmla_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + static_assert_imm2!(LANE); + vmla_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmla_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { + static_assert_imm3!(LANE); + vmla_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlaq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { + static_assert_imm2!(LANE); + vmlaq_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlaq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + static_assert_imm3!(LANE); + vmlaq_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmla_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + static_assert_imm1!(LANE); + vmla_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmla_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { + static_assert_imm2!(LANE); + vmla_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlaq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { + static_assert_imm1!(LANE); + vmlaq_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlaq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + static_assert_imm2!(LANE); + vmlaq_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmla_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + static_assert_imm2!(LANE); + vmla_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmla_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t { + static_assert_imm3!(LANE); + vmla_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlaq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t { + static_assert_imm2!(LANE); + vmlaq_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlaq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + static_assert_imm3!(LANE); + vmlaq_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmla_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + static_assert_imm1!(LANE); + vmla_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmla_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t { + static_assert_imm2!(LANE); + vmla_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlaq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t { + static_assert_imm1!(LANE); + vmlaq_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlaq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_imm2!(LANE); + vmlaq_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmla_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + static_assert_imm1!(LANE); + vmla_f32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmla_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t { + static_assert_imm2!(LANE); + vmla_f32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlaq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t { + static_assert_imm1!(LANE); + vmlaq_f32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlaq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + static_assert_imm2!(LANE); + vmlaq_f32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + /// Signed multiply-add long #[inline] #[target_feature(enable = "neon")] @@ -3182,6 +3522,142 @@ pub unsafe fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2 simd_add(a, vmull_u32(b, c)) } +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))] +pub unsafe fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { + vmlal_s16(a, b, vdup_n_s16(c)) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))] +pub unsafe fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { + vmlal_s32(a, b, vdup_n_s32(c)) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))] +pub unsafe fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t { + vmlal_u16(a, b, vdup_n_u16(c)) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))] +pub unsafe fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t { + vmlal_u32(a, b, vdup_n_u32(c)) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + static_assert_imm2!(LANE); + vmlal_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t { + static_assert_imm3!(LANE); + vmlal_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + static_assert_imm1!(LANE); + vmlal_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t { + static_assert_imm2!(LANE); + vmlal_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + static_assert_imm2!(LANE); + vmlal_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t { + static_assert_imm3!(LANE); + vmlal_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + static_assert_imm1!(LANE); + vmlal_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t { + static_assert_imm2!(LANE); + vmlal_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) +} + /// Multiply-subtract from accumulator #[inline] #[target_feature(enable = "neon")] @@ -3322,6 +3798,346 @@ pub unsafe fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float simd_sub(a, simd_mul(b, c)) } +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t { + vmls_s16(a, b, vdup_n_s16(c)) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t { + vmlsq_s16(a, b, vdupq_n_s16(c)) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t { + vmls_s32(a, b, vdup_n_s32(c)) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t { + vmlsq_s32(a, b, vdupq_n_s32(c)) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t { + vmls_u16(a, b, vdup_n_u16(c)) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t { + vmlsq_u16(a, b, vdupq_n_u16(c)) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t { + vmls_u32(a, b, vdup_n_u32(c)) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t { + vmlsq_u32(a, b, vdupq_n_u32(c)) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +pub unsafe fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { + vmls_f32(a, b, vdup_n_f32(c)) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +pub unsafe fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { + vmlsq_f32(a, b, vdupq_n_f32(c)) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmls_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + static_assert_imm2!(LANE); + vmls_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmls_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { + static_assert_imm3!(LANE); + vmls_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { + static_assert_imm2!(LANE); + vmlsq_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + static_assert_imm3!(LANE); + vmlsq_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmls_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + static_assert_imm1!(LANE); + vmls_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmls_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { + static_assert_imm2!(LANE); + vmls_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { + static_assert_imm1!(LANE); + vmlsq_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + static_assert_imm2!(LANE); + vmlsq_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmls_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + static_assert_imm2!(LANE); + vmls_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmls_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t { + static_assert_imm3!(LANE); + vmls_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t { + static_assert_imm2!(LANE); + vmlsq_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + static_assert_imm3!(LANE); + vmlsq_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmls_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + static_assert_imm1!(LANE); + vmls_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmls_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t { + static_assert_imm2!(LANE); + vmls_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t { + static_assert_imm1!(LANE); + vmlsq_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_imm2!(LANE); + vmlsq_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmls_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + static_assert_imm1!(LANE); + vmls_f32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmls_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t { + static_assert_imm2!(LANE); + vmls_f32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t { + static_assert_imm1!(LANE); + vmlsq_f32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + static_assert_imm2!(LANE); + vmlsq_f32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + /// Signed multiply-subtract long #[inline] #[target_feature(enable = "neon")] @@ -3352,7 +4168,7 @@ pub unsafe fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { simd_sub(a, vmull_s32(b, c)) } -/// Signed multiply-subtract long +/// Unsigned multiply-subtract long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -3362,7 +4178,7 @@ pub unsafe fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t simd_sub(a, vmull_u8(b, c)) } -/// Signed multiply-subtract long +/// Unsigned multiply-subtract long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -3372,7 +4188,7 @@ pub unsafe fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4 simd_sub(a, vmull_u16(b, c)) } -/// Signed multiply-subtract long +/// Unsigned multiply-subtract long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -3382,6 +4198,142 @@ pub unsafe fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2 simd_sub(a, vmull_u32(b, c)) } +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))] +pub unsafe fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { + vmlsl_s16(a, b, vdup_n_s16(c)) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))] +pub unsafe fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { + vmlsl_s32(a, b, vdup_n_s32(c)) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))] +pub unsafe fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t { + vmlsl_u16(a, b, vdup_n_u16(c)) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))] +pub unsafe fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t { + vmlsl_u32(a, b, vdup_n_u32(c)) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + static_assert_imm2!(LANE); + vmlsl_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t { + static_assert_imm3!(LANE); + vmlsl_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + static_assert_imm1!(LANE); + vmlsl_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t { + static_assert_imm2!(LANE); + vmlsl_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + static_assert_imm2!(LANE); + vmlsl_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t { + static_assert_imm3!(LANE); + vmlsl_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + static_assert_imm1!(LANE); + vmlsl_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t { + static_assert_imm2!(LANE); + vmlsl_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) +} + /// Negate #[inline] #[target_feature(enable = "neon")] @@ -14222,6 +15174,306 @@ mod test { } #[simd_test(enable = "neon")] + unsafe fn test_vmla_n_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16 = 3; + let e: i16x4 = i16x4::new(6, 7, 8, 9); + let r: i16x4 = transmute(vmla_n_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_n_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i16 = 3; + let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: i16x8 = transmute(vmlaq_n_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_n_s32() { + let a: i32x2 = i32x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32 = 3; + let e: i32x2 = i32x2::new(6, 7); + let r: i32x2 = transmute(vmla_n_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_n_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let c: i32 = 3; + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlaq_n_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_n_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16 = 3; + let e: u16x4 = u16x4::new(6, 7, 8, 9); + let r: u16x4 = transmute(vmla_n_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_n_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u16 = 3; + let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: u16x8 = transmute(vmlaq_n_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_n_u32() { + let a: u32x2 = u32x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32 = 3; + let e: u32x2 = u32x2::new(6, 7); + let r: u32x2 = transmute(vmla_n_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_n_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u32x4 = u32x4::new(2, 2, 2, 2); + let c: u32 = 3; + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlaq_n_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_n_f32() { + let a: f32x2 = f32x2::new(0., 1.); + let b: f32x2 = f32x2::new(2., 2.); + let c: f32 = 3.; + let e: f32x2 = f32x2::new(6., 7.); + let r: f32x2 = transmute(vmla_n_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_n_f32() { + let a: f32x4 = f32x4::new(0., 1., 2., 3.); + let b: f32x4 = f32x4::new(2., 2., 2., 2.); + let c: f32 = 3.; + let e: f32x4 = f32x4::new(6., 7., 8., 9.); + let r: f32x4 = transmute(vmlaq_n_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_lane_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x4 = i16x4::new(0, 3, 0, 0); + let e: i16x4 = i16x4::new(6, 7, 8, 9); + let r: i16x4 = transmute(vmla_lane_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_laneq_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: i16x4 = i16x4::new(6, 7, 8, 9); + let r: i16x4 = transmute(vmla_laneq_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_lane_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i16x4 = i16x4::new(0, 3, 0, 0); + let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: i16x8 = transmute(vmlaq_lane_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_laneq_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: i16x8 = transmute(vmlaq_laneq_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_lane_s32() { + let a: i32x2 = i32x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x2 = i32x2::new(0, 3); + let e: i32x2 = i32x2::new(6, 7); + let r: i32x2 = transmute(vmla_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_laneq_s32() { + let a: i32x2 = i32x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x4 = i32x4::new(0, 3, 0, 0); + let e: i32x2 = i32x2::new(6, 7); + let r: i32x2 = transmute(vmla_laneq_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_lane_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let c: i32x2 = i32x2::new(0, 3); + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlaq_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_laneq_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let c: i32x4 = i32x4::new(0, 3, 0, 0); + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlaq_laneq_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_lane_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x4 = u16x4::new(0, 3, 0, 0); + let e: u16x4 = u16x4::new(6, 7, 8, 9); + let r: u16x4 = transmute(vmla_lane_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_laneq_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: u16x4 = u16x4::new(6, 7, 8, 9); + let r: u16x4 = transmute(vmla_laneq_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_lane_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u16x4 = u16x4::new(0, 3, 0, 0); + let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: u16x8 = transmute(vmlaq_lane_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_laneq_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: u16x8 = transmute(vmlaq_laneq_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_lane_u32() { + let a: u32x2 = u32x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x2 = u32x2::new(0, 3); + let e: u32x2 = u32x2::new(6, 7); + let r: u32x2 = transmute(vmla_lane_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_laneq_u32() { + let a: u32x2 = u32x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x4 = u32x4::new(0, 3, 0, 0); + let e: u32x2 = u32x2::new(6, 7); + let r: u32x2 = transmute(vmla_laneq_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_lane_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u32x4 = u32x4::new(2, 2, 2, 2); + let c: u32x2 = u32x2::new(0, 3); + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlaq_lane_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_laneq_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u32x4 = u32x4::new(2, 2, 2, 2); + let c: u32x4 = u32x4::new(0, 3, 0, 0); + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlaq_laneq_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_lane_f32() { + let a: f32x2 = f32x2::new(0., 1.); + let b: f32x2 = f32x2::new(2., 2.); + let c: f32x2 = f32x2::new(0., 3.); + let e: f32x2 = f32x2::new(6., 7.); + let r: f32x2 = transmute(vmla_lane_f32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_laneq_f32() { + let a: f32x2 = f32x2::new(0., 1.); + let b: f32x2 = f32x2::new(2., 2.); + let c: f32x4 = f32x4::new(0., 3., 0., 0.); + let e: f32x2 = f32x2::new(6., 7.); + let r: f32x2 = transmute(vmla_laneq_f32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_lane_f32() { + let a: f32x4 = f32x4::new(0., 1., 2., 3.); + let b: f32x4 = f32x4::new(2., 2., 2., 2.); + let c: f32x2 = f32x2::new(0., 3.); + let e: f32x4 = f32x4::new(6., 7., 8., 9.); + let r: f32x4 = transmute(vmlaq_lane_f32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_laneq_f32() { + let a: f32x4 = f32x4::new(0., 1., 2., 3.); + let b: f32x4 = f32x4::new(2., 2., 2., 2.); + let c: f32x4 = f32x4::new(0., 3., 0., 0.); + let e: f32x4 = f32x4::new(6., 7., 8., 9.); + let r: f32x4 = transmute(vmlaq_laneq_f32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] unsafe fn test_vmlal_s8() { let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); @@ -14282,6 +15534,126 @@ mod test { } #[simd_test(enable = "neon")] + unsafe fn test_vmlal_n_s16() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16 = 3; + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlal_n_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_n_s32() { + let a: i64x2 = i64x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32 = 3; + let e: i64x2 = i64x2::new(6, 7); + let r: i64x2 = transmute(vmlal_n_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_n_u16() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16 = 3; + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlal_n_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_n_u32() { + let a: u64x2 = u64x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32 = 3; + let e: u64x2 = u64x2::new(6, 7); + let r: u64x2 = transmute(vmlal_n_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_lane_s16() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x4 = i16x4::new(0, 3, 0, 0); + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlal_lane_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_laneq_s16() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlal_laneq_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_lane_s32() { + let a: i64x2 = i64x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x2 = i32x2::new(0, 3); + let e: i64x2 = i64x2::new(6, 7); + let r: i64x2 = transmute(vmlal_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_laneq_s32() { + let a: i64x2 = i64x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x4 = i32x4::new(0, 3, 0, 0); + let e: i64x2 = i64x2::new(6, 7); + let r: i64x2 = transmute(vmlal_laneq_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_lane_u16() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x4 = u16x4::new(0, 3, 0, 0); + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlal_lane_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_laneq_u16() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlal_laneq_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_lane_u32() { + let a: u64x2 = u64x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x2 = u32x2::new(0, 3); + let e: u64x2 = u64x2::new(6, 7); + let r: u64x2 = transmute(vmlal_lane_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_laneq_u32() { + let a: u64x2 = u64x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x4 = u32x4::new(0, 3, 0, 0); + let e: u64x2 = u64x2::new(6, 7); + let r: u64x2 = transmute(vmlal_laneq_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] unsafe fn test_vmls_s8() { let a: i8x8 = i8x8::new(6, 7, 8, 9, 10, 11, 12, 13); let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); @@ -14422,6 +15794,306 @@ mod test { } #[simd_test(enable = "neon")] + unsafe fn test_vmls_n_s16() { + let a: i16x4 = i16x4::new(6, 7, 8, 9); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16 = 3; + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vmls_n_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_n_s16() { + let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i16 = 3; + let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vmlsq_n_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_n_s32() { + let a: i32x2 = i32x2::new(6, 7); + let b: i32x2 = i32x2::new(2, 2); + let c: i32 = 3; + let e: i32x2 = i32x2::new(0, 1); + let r: i32x2 = transmute(vmls_n_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_n_s32() { + let a: i32x4 = i32x4::new(6, 7, 8, 9); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let c: i32 = 3; + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vmlsq_n_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_n_u16() { + let a: u16x4 = u16x4::new(6, 7, 8, 9); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16 = 3; + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vmls_n_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_n_u16() { + let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u16 = 3; + let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u16x8 = transmute(vmlsq_n_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_n_u32() { + let a: u32x2 = u32x2::new(6, 7); + let b: u32x2 = u32x2::new(2, 2); + let c: u32 = 3; + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vmls_n_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_n_u32() { + let a: u32x4 = u32x4::new(6, 7, 8, 9); + let b: u32x4 = u32x4::new(2, 2, 2, 2); + let c: u32 = 3; + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vmlsq_n_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_n_f32() { + let a: f32x2 = f32x2::new(6., 7.); + let b: f32x2 = f32x2::new(2., 2.); + let c: f32 = 3.; + let e: f32x2 = f32x2::new(0., 1.); + let r: f32x2 = transmute(vmls_n_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_n_f32() { + let a: f32x4 = f32x4::new(6., 7., 8., 9.); + let b: f32x4 = f32x4::new(2., 2., 2., 2.); + let c: f32 = 3.; + let e: f32x4 = f32x4::new(0., 1., 2., 3.); + let r: f32x4 = transmute(vmlsq_n_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_lane_s16() { + let a: i16x4 = i16x4::new(6, 7, 8, 9); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x4 = i16x4::new(0, 3, 0, 0); + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vmls_lane_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_laneq_s16() { + let a: i16x4 = i16x4::new(6, 7, 8, 9); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vmls_laneq_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_lane_s16() { + let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i16x4 = i16x4::new(0, 3, 0, 0); + let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vmlsq_lane_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_laneq_s16() { + let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vmlsq_laneq_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_lane_s32() { + let a: i32x2 = i32x2::new(6, 7); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x2 = i32x2::new(0, 3); + let e: i32x2 = i32x2::new(0, 1); + let r: i32x2 = transmute(vmls_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_laneq_s32() { + let a: i32x2 = i32x2::new(6, 7); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x4 = i32x4::new(0, 3, 0, 0); + let e: i32x2 = i32x2::new(0, 1); + let r: i32x2 = transmute(vmls_laneq_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_lane_s32() { + let a: i32x4 = i32x4::new(6, 7, 8, 9); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let c: i32x2 = i32x2::new(0, 3); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vmlsq_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_laneq_s32() { + let a: i32x4 = i32x4::new(6, 7, 8, 9); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let c: i32x4 = i32x4::new(0, 3, 0, 0); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vmlsq_laneq_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_lane_u16() { + let a: u16x4 = u16x4::new(6, 7, 8, 9); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x4 = u16x4::new(0, 3, 0, 0); + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vmls_lane_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_laneq_u16() { + let a: u16x4 = u16x4::new(6, 7, 8, 9); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vmls_laneq_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_lane_u16() { + let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u16x4 = u16x4::new(0, 3, 0, 0); + let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u16x8 = transmute(vmlsq_lane_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_laneq_u16() { + let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u16x8 = transmute(vmlsq_laneq_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_lane_u32() { + let a: u32x2 = u32x2::new(6, 7); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x2 = u32x2::new(0, 3); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vmls_lane_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_laneq_u32() { + let a: u32x2 = u32x2::new(6, 7); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x4 = u32x4::new(0, 3, 0, 0); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vmls_laneq_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_lane_u32() { + let a: u32x4 = u32x4::new(6, 7, 8, 9); + let b: u32x4 = u32x4::new(2, 2, 2, 2); + let c: u32x2 = u32x2::new(0, 3); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vmlsq_lane_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_laneq_u32() { + let a: u32x4 = u32x4::new(6, 7, 8, 9); + let b: u32x4 = u32x4::new(2, 2, 2, 2); + let c: u32x4 = u32x4::new(0, 3, 0, 0); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vmlsq_laneq_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_lane_f32() { + let a: f32x2 = f32x2::new(6., 7.); + let b: f32x2 = f32x2::new(2., 2.); + let c: f32x2 = f32x2::new(0., 3.); + let e: f32x2 = f32x2::new(0., 1.); + let r: f32x2 = transmute(vmls_lane_f32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_laneq_f32() { + let a: f32x2 = f32x2::new(6., 7.); + let b: f32x2 = f32x2::new(2., 2.); + let c: f32x4 = f32x4::new(0., 3., 0., 0.); + let e: f32x2 = f32x2::new(0., 1.); + let r: f32x2 = transmute(vmls_laneq_f32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_lane_f32() { + let a: f32x4 = f32x4::new(6., 7., 8., 9.); + let b: f32x4 = f32x4::new(2., 2., 2., 2.); + let c: f32x2 = f32x2::new(0., 3.); + let e: f32x4 = f32x4::new(0., 1., 2., 3.); + let r: f32x4 = transmute(vmlsq_lane_f32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_laneq_f32() { + let a: f32x4 = f32x4::new(6., 7., 8., 9.); + let b: f32x4 = f32x4::new(2., 2., 2., 2.); + let c: f32x4 = f32x4::new(0., 3., 0., 0.); + let e: f32x4 = f32x4::new(0., 1., 2., 3.); + let r: f32x4 = transmute(vmlsq_laneq_f32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] unsafe fn test_vmlsl_s8() { let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); @@ -14482,6 +16154,126 @@ mod test { } #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_n_s16() { + let a: i32x4 = i32x4::new(6, 7, 8, 9); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16 = 3; + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vmlsl_n_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_n_s32() { + let a: i64x2 = i64x2::new(6, 7); + let b: i32x2 = i32x2::new(2, 2); + let c: i32 = 3; + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vmlsl_n_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_n_u16() { + let a: u32x4 = u32x4::new(6, 7, 8, 9); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16 = 3; + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vmlsl_n_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_n_u32() { + let a: u64x2 = u64x2::new(6, 7); + let b: u32x2 = u32x2::new(2, 2); + let c: u32 = 3; + let e: u64x2 = u64x2::new(0, 1); + let r: u64x2 = transmute(vmlsl_n_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_lane_s16() { + let a: i32x4 = i32x4::new(6, 7, 8, 9); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x4 = i16x4::new(0, 3, 0, 0); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vmlsl_lane_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_laneq_s16() { + let a: i32x4 = i32x4::new(6, 7, 8, 9); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vmlsl_laneq_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_lane_s32() { + let a: i64x2 = i64x2::new(6, 7); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x2 = i32x2::new(0, 3); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vmlsl_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_laneq_s32() { + let a: i64x2 = i64x2::new(6, 7); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x4 = i32x4::new(0, 3, 0, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vmlsl_laneq_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_lane_u16() { + let a: u32x4 = u32x4::new(6, 7, 8, 9); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x4 = u16x4::new(0, 3, 0, 0); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vmlsl_lane_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_laneq_u16() { + let a: u32x4 = u32x4::new(6, 7, 8, 9); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vmlsl_laneq_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_lane_u32() { + let a: u64x2 = u64x2::new(6, 7); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x2 = u32x2::new(0, 3); + let e: u64x2 = u64x2::new(0, 1); + let r: u64x2 = transmute(vmlsl_lane_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_laneq_u32() { + let a: u64x2 = u64x2::new(6, 7); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x4 = u32x4::new(0, 3, 0, 0); + let e: u64x2 = u64x2::new(0, 1); + let r: u64x2 = transmute(vmlsl_laneq_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] unsafe fn test_vneg_s8() { let a: i8x8 = i8x8::new(0, 1, -1, 2, -2, 3, -3, 4); let e: i8x8 = i8x8::new(0, -1, 1, -2, 2, -3, 3, -4); diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs index 5e1012fa1e1..b0eea598610 100644 --- a/library/stdarch/crates/core_arch/src/lib.rs +++ b/library/stdarch/crates/core_arch/src/lib.rs @@ -37,7 +37,8 @@ external_doc, allow_internal_unstable, decl_macro, - extended_key_value_attributes + extended_key_value_attributes, + bench_black_box )] #![cfg_attr(test, feature(test, abi_vectorcall))] #![cfg_attr(all(test, target_arch = "wasm32"), feature(wasm_simd))] diff --git a/library/stdarch/crates/stdarch-gen/neon.spec b/library/stdarch/crates/stdarch-gen/neon.spec index 0de37ad2192..f0b7448cc30 100644 --- a/library/stdarch/crates/stdarch-gen/neon.spec +++ b/library/stdarch/crates/stdarch-gen/neon.spec @@ -1222,6 +1222,68 @@ generate float64x*_t arm = vmla. generate float*_t +/// Vector multiply accumulate with scalar +name = vmla +n-suffix +multi_fn = vmla-self-noext, a, b, {vdup-nself-noext, c} +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +c = 3 +validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 + +aarch64 = mla +arm = vmla. +generate int16x4_t:int16x4_t:i16:int16x4_t, int16x8_t:int16x8_t:i16:int16x8_t, int32x2_t:int32x2_t:i32:int32x2_t, int32x4_t:int32x4_t:i32:int32x4_t +generate uint16x4_t:uint16x4_t:u16:uint16x4_t, uint16x8_t:uint16x8_t:u16:uint16x8_t, uint32x2_t:uint32x2_t:u32:uint32x2_t, uint32x4_t:uint32x4_t:u32:uint32x4_t + +/// Vector multiply accumulate with scalar +name = vmla +n-suffix +multi_fn = vmla-self-noext, a, b, {vdup-nself-noext, c} +a = 0., 1., 2., 3. +b = 2., 2., 2., 2. +c = 3. +validate 6., 7., 8., 9. + +aarch64 = fmul +arm = vmla. +generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t + +/// Vector multiply accumulate with scalar +name = vmla +in2-lane-suffixes +constn = LANE +multi_fn = static_assert_imm-in2_exp_len-LANE +multi_fn = vmla-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +n = 1 +validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 + +aarch64 = mla +arm = vmla. +generate int16x4_t, int16x4_t:int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x8_t:int16x4_t:int16x8_t, int16x8_t +generate int32x2_t, int32x2_t:int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x4_t:int32x2_t:int32x4_t, int32x4_t +generate uint16x4_t, uint16x4_t:uint16x4_t:uint16x8_t:uint16x4_t, uint16x8_t:uint16x8_t:uint16x4_t:uint16x8_t, uint16x8_t +generate uint32x2_t, uint32x2_t:uint32x2_t:uint32x4_t:uint32x2_t, uint32x4_t:uint32x4_t:uint32x2_t:uint32x4_t, uint32x4_t + +/// Vector multiply accumulate with scalar +name = vmla +in2-lane-suffixes +constn = LANE +multi_fn = static_assert_imm-in2_exp_len-LANE +multi_fn = vmla-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +a = 0., 1., 2., 3. +b = 2., 2., 2., 2. +c = 0., 3., 0., 0. +n = 1 +validate 6., 7., 8., 9. + +aarch64 = fmul +arm = vmla. +generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t + /// Signed multiply-add long name = vmlal multi_fn = simd_add, a, {vmull-self-noext, b, c} @@ -1246,6 +1308,41 @@ arm = vmlal.s aarch64 = umlal generate uint16x8_t:uint8x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t +/// Vector widening multiply accumulate with scalar +name = vmlal +n-suffix +multi_fn = vmlal-self-noext, a, b, {vdup-nself-noext, c} +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +c = 3 +validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 + +arm = vmlal.s +aarch64 = smlal +generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t +aarch64 = umlal +generate uint32x4_t:uint16x4_t:u16:uint32x4_t, uint64x2_t:uint32x2_t:u32:uint64x2_t + +/// Vector widening multiply accumulate with scalar +name = vmlal_lane +in2-suffix +constn = LANE +multi_fn = static_assert_imm-in2_exp_len-LANE +multi_fn = vmlal-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +n = 1 +validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 + +arm = vmlal.s +aarch64 = smlal +generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int32x4_t:int16x4_t:int16x8_t:int32x4_t +generate int64x2_t:int32x2_t:int32x2_t:int64x2_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t +aarch64 = umlal +generate uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x4_t:uint16x8_t:uint32x4_t +generate uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x2_t:uint32x4_t:uint64x2_t + /// Signed multiply-add long name = vmlal_high no-q @@ -1276,6 +1373,39 @@ validate 8, 9, 10, 11, 12, 13, 14, 15 aarch64 = umlal2 generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t +/// Multiply-add long +name = vmlal_high_n +no-q +multi_fn = vmlal_high-noqself-noext, a, b, {vdupq_n-noqself-noext, c} +a = 8, 7, 6, 5, 4, 3, 2, 1 +b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7 +c = 2 +validate 8, 9, 10, 11, 12, 13, 14, 15 + +aarch64 = smlal2 +generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t +aarch64 = umlal2 +generate uint32x4_t:uint16x8_t:u16:uint32x4_t, uint64x2_t:uint32x4_t:u32:uint64x2_t + +/// Multiply-add long +name = vmlal_high_lane +in2-suffix +constn = LANE +multi_fn = static_assert_imm-in2_exp_len-LANE +multi_fn = vmlal_high-noqself-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +a = 8, 7, 6, 5, 4, 3, 2, 1 +b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7 +c = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +n = 1 +validate 8, 9, 10, 11, 12, 13, 14, 15 + +aarch64 = smlal2 +generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t +generate int64x2_t:int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t +aarch64 = umlal2 +generate uint32x4_t:uint16x8_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t +generate uint64x2_t:uint32x4_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t + /// Multiply-subtract from accumulator name = vmls multi_fn = simd_sub, a, {simd_mul, b, c} @@ -1302,6 +1432,68 @@ generate float64x*_t arm = vmls. generate float*_t +/// Vector multiply subtract with scalar +name = vmls +n-suffix +multi_fn = vmls-self-noext, a, b, {vdup-nself-noext, c} +a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +c = 3 +validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + +aarch64 = mls +arm = vmls. +generate int16x4_t:int16x4_t:i16:int16x4_t, int16x8_t:int16x8_t:i16:int16x8_t, int32x2_t:int32x2_t:i32:int32x2_t, int32x4_t:int32x4_t:i32:int32x4_t +generate uint16x4_t:uint16x4_t:u16:uint16x4_t, uint16x8_t:uint16x8_t:u16:uint16x8_t, uint32x2_t:uint32x2_t:u32:uint32x2_t, uint32x4_t:uint32x4_t:u32:uint32x4_t + +/// Vector multiply subtract with scalar +name = vmls +n-suffix +multi_fn = vmls-self-noext, a, b, {vdup-nself-noext, c} +a = 6., 7., 8., 9. +b = 2., 2., 2., 2. +c = 3. +validate 0., 1., 2., 3. + +aarch64 = fmul +arm = vmls. +generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t + +/// Vector multiply subtract with scalar +name = vmls +in2-lane-suffixes +constn = LANE +multi_fn = static_assert_imm-in2_exp_len-LANE +multi_fn = vmls-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +n = 1 +validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + +aarch64 = mls +arm = vmls. +generate int16x4_t, int16x4_t:int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x8_t:int16x4_t:int16x8_t, int16x8_t +generate int32x2_t, int32x2_t:int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x4_t:int32x2_t:int32x4_t, int32x4_t +generate uint16x4_t, uint16x4_t:uint16x4_t:uint16x8_t:uint16x4_t, uint16x8_t:uint16x8_t:uint16x4_t:uint16x8_t, uint16x8_t +generate uint32x2_t, uint32x2_t:uint32x2_t:uint32x4_t:uint32x2_t, uint32x4_t:uint32x4_t:uint32x2_t:uint32x4_t, uint32x4_t + +/// Vector multiply subtract with scalar +name = vmls +in2-lane-suffixes +constn = LANE +multi_fn = static_assert_imm-in2_exp_len-LANE +multi_fn = vmls-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +a = 6., 7., 8., 9. +b = 2., 2., 2., 2. +c = 0., 3., 0., 0. +n = 1 +validate 0., 1., 2., 3. + +aarch64 = fmul +arm = vmls. +generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t + /// Signed multiply-subtract long name = vmlsl multi_fn = simd_sub, a, {vmull-self-noext, b, c} @@ -1314,7 +1506,7 @@ arm = vmlsl.s aarch64 = smlsl generate int16x8_t:int8x8_t:int8x8_t:int16x8_t, int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t -/// Signed multiply-subtract long +/// Unsigned multiply-subtract long name = vmlsl multi_fn = simd_sub, a, {vmull-self-noext, b, c} a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 @@ -1326,6 +1518,41 @@ arm = vmlsl.s aarch64 = umlsl generate uint16x8_t:uint8x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t +/// Vector widening multiply subtract with scalar +name = vmlsl +n-suffix +multi_fn = vmlsl-self-noext, a, b, {vdup-nself-noext, c} +a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +c = 3 +validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + +arm = vmlsl.s +aarch64 = smlsl +generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t +aarch64 = umlsl +generate uint32x4_t:uint16x4_t:u16:uint32x4_t, uint64x2_t:uint32x2_t:u32:uint64x2_t + +/// Vector widening multiply subtract with scalar +name = vmlsl_lane +in2-suffix +constn = LANE +multi_fn = static_assert_imm-in2_exp_len-LANE +multi_fn = vmlsl-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +n = 1 +validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + +arm = vmlsl.s +aarch64 = smlsl +generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int32x4_t:int16x4_t:int16x8_t:int32x4_t +generate int64x2_t:int32x2_t:int32x2_t:int64x2_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t +aarch64 = umlsl +generate uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x4_t:uint16x8_t:uint32x4_t +generate uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x2_t:uint32x4_t:uint64x2_t + /// Signed multiply-subtract long name = vmlsl_high no-q @@ -1356,6 +1583,39 @@ validate 14, 13, 12, 11, 10, 9, 8, 7 aarch64 = umlsl2 generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t +/// Multiply-subtract long +name = vmlsl_high_n +no-q +multi_fn = vmlsl_high-noqself-noext, a, b, {vdupq_n-noqself-noext, c} +a = 14, 15, 16, 17, 18, 19, 20, 21 +b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7 +c = 2 +validate 14, 13, 12, 11, 10, 9, 8, 7 + +aarch64 = smlsl2 +generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t +aarch64 = umlsl2 +generate uint32x4_t:uint16x8_t:u16:uint32x4_t, uint64x2_t:uint32x4_t:u32:uint64x2_t + +/// Multiply-subtract long +name = vmlsl_high_lane +in2-suffix +constn = LANE +multi_fn = static_assert_imm-in2_exp_len-LANE +multi_fn = vmlsl_high-noqself-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +a = 14, 15, 16, 17, 18, 19, 20, 21 +b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7 +c = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +n = 1 +validate 14, 13, 12, 11, 10, 9, 8, 7 + +aarch64 = smlsl2 +generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t +generate int64x2_t:int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t +aarch64 = umlsl2 +generate uint32x4_t:uint16x8_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t +generate uint64x2_t:uint32x4_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t + /// Extract narrow name = vmovn_high no-q diff --git a/library/stdarch/crates/stdarch-gen/src/main.rs b/library/stdarch/crates/stdarch-gen/src/main.rs index f659bab99bd..5a905d92fed 100644 --- a/library/stdarch/crates/stdarch-gen/src/main.rs +++ b/library/stdarch/crates/stdarch-gen/src/main.rs @@ -349,6 +349,7 @@ enum Suffix { OutSuffix, Lane, In2, + In2Lane, } #[derive(Clone, Copy)] @@ -847,6 +848,7 @@ fn gen_aarch64( OutSuffix => format!("{}{}", current_name, type_to_suffix(out_t)), Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])), In2 => format!("{}{}", current_name, type_to_suffix(in_t[2])), + In2Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[2])), }; let current_fn = if let Some(current_fn) = current_fn.clone() { if link_aarch64.is_some() { @@ -1259,6 +1261,7 @@ fn gen_arm( OutSuffix => format!("{}{}", current_name, type_to_suffix(out_t)), Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])), In2 => format!("{}{}", current_name, type_to_suffix(in_t[2])), + In2Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[2])), }; let current_aarch64 = current_aarch64 .clone() @@ -2216,6 +2219,8 @@ mod test { suffix = Lane; } else if line.starts_with("in2-suffix") { suffix = In2; + } else if line.starts_with("in2-lane-suffixes") { + suffix = In2Lane; } else if line.starts_with("a = ") { a = line[4..].split(',').map(|v| v.trim().to_string()).collect(); } else if line.starts_with("b = ") { diff --git a/library/stdarch/crates/stdarch-test/src/lib.rs b/library/stdarch/crates/stdarch-test/src/lib.rs index 8f6aa4a267c..cc48a65b25f 100644 --- a/library/stdarch/crates/stdarch-test/src/lib.rs +++ b/library/stdarch/crates/stdarch-test/src/lib.rs @@ -3,7 +3,7 @@ //! This basically just disassembles the current executable and then parses the //! output once globally and then provides the `assert` function which makes //! assertions about the disassembly of a function. -#![feature(test)] // For black_box +#![feature(bench_black_box)] // For black_box #![deny(rust_2018_idioms)] #![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)] |
