diff options
Diffstat (limited to 'library/stdarch')
6 files changed, 448 insertions, 343 deletions
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index 791e7707bc7..88fe4cb085c 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -3937,26 +3937,6 @@ pub unsafe fn vqsubh_s16(a: i16, b: i16) -> i16 { /// Saturating subtract #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqsub))] -pub unsafe fn vqsubs_s32(a: i32, b: i32) -> i32 { - let a: int32x2_t = vdup_n_s32(a); - let b: int32x2_t = vdup_n_s32(b); - simd_extract(vqsub_s32(a, b), 0) -} - -/// Saturating subtract -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqsub))] -pub unsafe fn vqsubd_s64(a: i64, b: i64) -> i64 { - let a: int64x1_t = vdup_n_s64(a); - let b: int64x1_t = vdup_n_s64(b); - simd_extract(vqsub_s64(a, b), 0) -} - -/// Saturating subtract -#[inline] -#[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uqsub))] pub unsafe fn vqsubb_u8(a: u8, b: u8) -> u8 { let a: uint8x8_t = vdup_n_u8(a); @@ -3979,9 +3959,12 @@ pub unsafe fn vqsubh_u16(a: u16, b: u16) -> u16 { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uqsub))] pub unsafe fn vqsubs_u32(a: u32, b: u32) -> u32 { - let a: uint32x2_t = vdup_n_u32(a); - let b: uint32x2_t = vdup_n_u32(b); - simd_extract(vqsub_u32(a, b), 0) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.i32")] + fn vqsubs_u32_(a: u32, b: u32) -> u32; + } + vqsubs_u32_(a, b) } /// Saturating subtract @@ -3989,9 +3972,38 @@ pub unsafe fn vqsubs_u32(a: u32, b: u32) -> u32 { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uqsub))] pub unsafe fn vqsubd_u64(a: u64, b: u64) -> u64 { - let a: uint64x1_t = vdup_n_u64(a); - let b: uint64x1_t = vdup_n_u64(b); - simd_extract(vqsub_u64(a, b), 0) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.i64")] + fn vqsubd_u64_(a: u64, b: u64) -> u64; + } + vqsubd_u64_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqsub))] +pub unsafe fn vqsubs_s32(a: i32, b: i32) -> i32 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.i32")] + fn vqsubs_s32_(a: i32, b: i32) -> i32; + } + vqsubs_s32_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqsub))] +pub unsafe fn vqsubd_s64(a: i64, b: i64) -> i64 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.i64")] + fn vqsubd_s64_(a: i64, b: i64) -> i64; + } + vqsubd_s64_(a, b) } /// Reverse bit order @@ -4413,26 +4425,6 @@ pub unsafe fn vqaddh_s16(a: i16, b: i16) -> i16 { /// Saturating add #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqadd))] -pub unsafe fn vqadds_s32(a: i32, b: i32) -> i32 { - let a: int32x2_t = vdup_n_s32(a); - let b: int32x2_t = vdup_n_s32(b); - simd_extract(vqadd_s32(a, b), 0) -} - -/// Saturating add -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqadd))] -pub unsafe fn vqaddd_s64(a: i64, b: i64) -> i64 { - let a: int64x1_t = vdup_n_s64(a); - let b: int64x1_t = vdup_n_s64(b); - simd_extract(vqadd_s64(a, b), 0) -} - -/// Saturating add -#[inline] -#[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uqadd))] pub unsafe fn vqaddb_u8(a: u8, b: u8) -> u8 { let a: uint8x8_t = vdup_n_u8(a); @@ -4455,9 +4447,12 @@ pub unsafe fn vqaddh_u16(a: u16, b: u16) -> u16 { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uqadd))] pub unsafe fn vqadds_u32(a: u32, b: u32) -> u32 { - let a: uint32x2_t = vdup_n_u32(a); - let b: uint32x2_t = vdup_n_u32(b); - simd_extract(vqadd_u32(a, b), 0) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.i32")] + fn vqadds_u32_(a: u32, b: u32) -> u32; + } + vqadds_u32_(a, b) } /// Saturating add @@ -4465,9 +4460,38 @@ pub unsafe fn vqadds_u32(a: u32, b: u32) -> u32 { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uqadd))] pub unsafe fn vqaddd_u64(a: u64, b: u64) -> u64 { - let a: uint64x1_t = vdup_n_u64(a); - let b: uint64x1_t = vdup_n_u64(b); - simd_extract(vqadd_u64(a, b), 0) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.i64")] + fn vqaddd_u64_(a: u64, b: u64) -> u64; + } + vqaddd_u64_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqadd))] +pub unsafe fn vqadds_s32(a: i32, b: i32) -> i32 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.i32")] + fn vqadds_s32_(a: i32, b: i32) -> i32; + } + vqadds_s32_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqadd))] +pub unsafe fn vqaddd_s64(a: i64, b: i64) -> i64 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.i64")] + fn vqaddd_s64_(a: i64, b: i64) -> i64; + } + vqaddd_s64_(a, b) } /// Multiply @@ -5935,14 +5959,6 @@ pub unsafe fn vqmovns_s32(a: i32) -> i16 { /// Saturating extract narrow #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqxtn))] -pub unsafe fn vqmovnd_s64(a: i64) -> i32 { - simd_extract(vqmovn_s64(vdupq_n_s64(a)), 0) -} - -/// Saturating extract narrow -#[inline] -#[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uqxtn))] pub unsafe fn vqmovnh_u16(a: u16) -> u8 { simd_extract(vqmovn_u16(vdupq_n_u16(a)), 0) @@ -5959,9 +5975,27 @@ pub unsafe fn vqmovns_u32(a: u32) -> u16 { /// Saturating extract narrow #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqxtn))] +pub unsafe fn vqmovnd_s64(a: i64) -> i32 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.scalar.sqxtn.i32.i64")] + fn vqmovnd_s64_(a: i64) -> i32; + } + vqmovnd_s64_(a) +} + +/// Saturating extract narrow +#[inline] +#[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uqxtn))] pub unsafe fn vqmovnd_u64(a: u64) -> u32 { - simd_extract(vqmovn_u64(vdupq_n_u64(a)), 0) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.scalar.uqxtn.i32.i64")] + fn vqmovnd_u64_(a: u64) -> u32; + } + vqmovnd_u64_(a) } /// Signed saturating extract narrow @@ -6232,80 +6266,92 @@ pub unsafe fn vqrdmlshs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sqrshl))] -pub unsafe fn vqrshlb_s8(a: i8, b: i8) -> i8 { - let a: int8x8_t = vdup_n_s8(a); - let b: int8x8_t = vdup_n_s8(b); - simd_extract(vqrshl_s8(a, b), 0) +pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.i32")] + fn vqrshls_s32_(a: i32, b: i32) -> i32; + } + vqrshls_s32_(a, b) } /// Signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sqrshl))] -pub unsafe fn vqrshlh_s16(a: i16, b: i16) -> i16 { - let a: int16x4_t = vdup_n_s16(a); - let b: int16x4_t = vdup_n_s16(b); - simd_extract(vqrshl_s16(a, b), 0) +pub unsafe fn vqrshld_s64(a: i64, b: i64) -> i64 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.i64")] + fn vqrshld_s64_(a: i64, b: i64) -> i64; + } + vqrshld_s64_(a, b) } /// Signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sqrshl))] -pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 { - let a: int32x2_t = vdup_n_s32(a); - let b: int32x2_t = vdup_n_s32(b); - simd_extract(vqrshl_s32(a, b), 0) +pub unsafe fn vqrshlb_s8(a: i8, b: i8) -> i8 { + let a: int8x8_t = vdup_n_s8(a); + let b: int8x8_t = vdup_n_s8(b); + simd_extract(vqrshl_s8(a, b), 0) } /// Signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sqrshl))] -pub unsafe fn vqrshld_s64(a: i64, b: i64) -> i64 { - let a: int64x1_t = vdup_n_s64(a); - let b: int64x1_t = vdup_n_s64(b); - simd_extract(vqrshl_s64(a, b), 0) +pub unsafe fn vqrshlh_s16(a: i16, b: i16) -> i16 { + let a: int16x4_t = vdup_n_s16(a); + let b: int16x4_t = vdup_n_s16(b); + simd_extract(vqrshl_s16(a, b), 0) } /// Unsigned signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uqrshl))] -pub unsafe fn vqrshlb_u8(a: u8, b: i8) -> u8 { - let a: uint8x8_t = vdup_n_u8(a); - let b: int8x8_t = vdup_n_s8(b); - simd_extract(vqrshl_u8(a, b), 0) +pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.i32")] + fn vqrshls_u32_(a: u32, b: i32) -> u32; + } + vqrshls_u32_(a, b) } /// Unsigned signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uqrshl))] -pub unsafe fn vqrshlh_u16(a: u16, b: i16) -> u16 { - let a: uint16x4_t = vdup_n_u16(a); - let b: int16x4_t = vdup_n_s16(b); - simd_extract(vqrshl_u16(a, b), 0) +pub unsafe fn vqrshld_u64(a: u64, b: i64) -> u64 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.i64")] + fn vqrshld_u64_(a: u64, b: i64) -> u64; + } + vqrshld_u64_(a, b) } /// Unsigned signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uqrshl))] -pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 { - let a: uint32x2_t = vdup_n_u32(a); - let b: int32x2_t = vdup_n_s32(b); - simd_extract(vqrshl_u32(a, b), 0) +pub unsafe fn vqrshlb_u8(a: u8, b: i8) -> u8 { + let a: uint8x8_t = vdup_n_u8(a); + let b: int8x8_t = vdup_n_s8(b); + simd_extract(vqrshl_u8(a, b), 0) } /// Unsigned signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uqrshl))] -pub unsafe fn vqrshld_u64(a: u64, b: i64) -> u64 { - let a: uint64x1_t = vdup_n_u64(a); - let b: int64x1_t = vdup_n_s64(b); - simd_extract(vqrshl_u64(a, b), 0) +pub unsafe fn vqrshlh_u16(a: u16, b: i16) -> u16 { + let a: uint16x4_t = vdup_n_u16(a); + let b: int16x4_t = vdup_n_s16(b); + simd_extract(vqrshl_u16(a, b), 0) } /// Signed saturating rounded shift right narrow @@ -6501,6 +6547,19 @@ pub unsafe fn vqrshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sqshl))] +pub unsafe fn vqshld_s64(a: i64, b: i64) -> i64 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.i64")] + fn vqshld_s64_(a: i64, b: i64) -> i64; + } + vqshld_s64_(a, b) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshl))] pub unsafe fn vqshlb_s8(a: i8, b: i8) -> i8 { let c: int8x8_t = vqshl_s8(vdup_n_s8(a), vdup_n_s8(b)); simd_extract(c, 0) @@ -6524,13 +6583,17 @@ pub unsafe fn vqshls_s32(a: i32, b: i32) -> i32 { simd_extract(c, 0) } -/// Signed saturating shift left +/// Unsigned saturating shift left #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqshl))] -pub unsafe fn vqshld_s64(a: i64, b: i64) -> i64 { - let c: int64x1_t = vqshl_s64(vdup_n_s64(a), vdup_n_s64(b)); - simd_extract(c, 0) +#[cfg_attr(test, assert_instr(uqshl))] +pub unsafe fn vqshld_u64(a: u64, b: i64) -> u64 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.i64")] + fn vqshld_u64_(a: u64, b: i64) -> u64; + } + vqshld_u64_(a, b) } /// Unsigned saturating shift left @@ -6560,15 +6623,6 @@ pub unsafe fn vqshls_u32(a: u32, b: i32) -> u32 { simd_extract(c, 0) } -/// Unsigned saturating shift left -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(uqshl))] -pub unsafe fn vqshld_u64(a: u64, b: i64) -> u64 { - let c: uint64x1_t = vqshl_u64(vdup_n_u64(a), vdup_n_s64(b)); - simd_extract(c, 0) -} - /// Signed saturating shift left #[inline] #[target_feature(enable = "neon")] @@ -6654,9 +6708,14 @@ pub unsafe fn vqshld_n_u64<const N: i32>(a: u64) -> u64 { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sqshrn, N = 2))] #[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrnh_n_s16<const N: i32>(a: i16) -> i8 { - static_assert!(N : i32 where N >= 1 && N <= 8); - simd_extract(vqshrn_n_s16::<N>(vdupq_n_s16(a)), 0) +pub unsafe fn vqshrnd_n_s64<const N: i32>(a: i64) -> i32 { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.i32")] + fn vqshrnd_n_s64_(a: i64, n: i32) -> i32; + } + vqshrnd_n_s64_(a, N) } /// Signed saturating shift right narrow @@ -6664,9 +6723,9 @@ pub unsafe fn vqshrnh_n_s16<const N: i32>(a: i16) -> i8 { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sqshrn, N = 2))] #[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrns_n_s32<const N: i32>(a: i32) -> i16 { - static_assert!(N : i32 where N >= 1 && N <= 16); - simd_extract(vqshrn_n_s32::<N>(vdupq_n_s32(a)), 0) +pub unsafe fn vqshrnh_n_s16<const N: i32>(a: i16) -> i8 { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_extract(vqshrn_n_s16::<N>(vdupq_n_s16(a)), 0) } /// Signed saturating shift right narrow @@ -6674,9 +6733,9 @@ pub unsafe fn vqshrns_n_s32<const N: i32>(a: i32) -> i16 { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sqshrn, N = 2))] #[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrnd_n_s64<const N: i32>(a: i64) -> i32 { - static_assert!(N : i32 where N >= 1 && N <= 32); - simd_extract(vqshrn_n_s64::<N>(vdupq_n_s64(a)), 0) +pub unsafe fn vqshrns_n_s32<const N: i32>(a: i32) -> i16 { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_extract(vqshrn_n_s32::<N>(vdupq_n_s32(a)), 0) } /// Signed saturating shift right narrow @@ -6714,9 +6773,14 @@ pub unsafe fn vqshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uqshrn, N = 2))] #[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrnh_n_u16<const N: i32>(a: u16) -> u8 { - static_assert!(N : i32 where N >= 1 && N <= 8); - simd_extract(vqshrn_n_u16::<N>(vdupq_n_u16(a)), 0) +pub unsafe fn vqshrnd_n_u64<const N: i32>(a: u64) -> u32 { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.i32")] + fn vqshrnd_n_u64_(a: u64, n: i32) -> u32; + } + vqshrnd_n_u64_(a, N) } /// Unsigned saturating shift right narrow @@ -6724,9 +6788,9 @@ pub unsafe fn vqshrnh_n_u16<const N: i32>(a: u16) -> u8 { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uqshrn, N = 2))] #[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrns_n_u32<const N: i32>(a: u32) -> u16 { - static_assert!(N : i32 where N >= 1 && N <= 16); - simd_extract(vqshrn_n_u32::<N>(vdupq_n_u32(a)), 0) +pub unsafe fn vqshrnh_n_u16<const N: i32>(a: u16) -> u8 { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_extract(vqshrn_n_u16::<N>(vdupq_n_u16(a)), 0) } /// Unsigned saturating shift right narrow @@ -6734,9 +6798,9 @@ pub unsafe fn vqshrns_n_u32<const N: i32>(a: u32) -> u16 { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uqshrn, N = 2))] #[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrnd_n_u64<const N: i32>(a: u64) -> u32 { - static_assert!(N : i32 where N >= 1 && N <= 32); - simd_extract(vqshrn_n_u64::<N>(vdupq_n_u64(a)), 0) +pub unsafe fn vqshrns_n_u32<const N: i32>(a: u32) -> u16 { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_extract(vqshrn_n_u32::<N>(vdupq_n_u32(a)), 0) } /// Unsigned saturating shift right narrow @@ -7654,7 +7718,12 @@ pub unsafe fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(srshl))] pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 { - transmute(vrshl_s64(transmute(a), transmute(b))) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.i64")] + fn vrshld_s64_(a: i64, b: i64) -> i64; + } + vrshld_s64_(a, b) } /// Unsigned rounding shift left @@ -7662,7 +7731,12 @@ pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(urshl))] pub unsafe fn vrshld_u64(a: u64, b: i64) -> u64 { - transmute(vrshl_u64(transmute(a), transmute(b))) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.i64")] + fn vrshld_u64_(a: u64, b: i64) -> u64; + } + vrshld_u64_(a, b) } /// Signed rounding shift right @@ -7748,23 +7822,23 @@ pub unsafe fn vrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> u /// Signed rounding shift right and accumulate. #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop, N = 2))] +#[cfg_attr(test, assert_instr(srsra, N = 2))] #[rustc_legacy_const_generics(2)] pub unsafe fn vrsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 { static_assert!(N : i32 where N >= 1 && N <= 64); - let b: int64x1_t = vrshr_n_s64::<N>(transmute(b)); - transmute(simd_add(transmute(a), b)) + let b: i64 = vrshrd_n_s64::<N>(b); + a + b } /// Ungisned rounding shift right and accumulate. #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop, N = 2))] +#[cfg_attr(test, assert_instr(ursra, N = 2))] #[rustc_legacy_const_generics(2)] pub unsafe fn vrsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 { static_assert!(N : i32 where N >= 1 && N <= 64); - let b: uint64x1_t = vrshr_n_u64::<N>(transmute(b)); - transmute(simd_add(transmute(a), b)) + let b: u64 = vrshrd_n_u64::<N>(b); + a + b } /// Insert vector element from another vector element @@ -12090,24 +12164,6 @@ mod test { } #[simd_test(enable = "neon")] - unsafe fn test_vqsubs_s32() { - let a: i32 = 42; - let b: i32 = 1; - let e: i32 = 41; - let r: i32 = transmute(vqsubs_s32(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vqsubd_s64() { - let a: i64 = 42; - let b: i64 = 1; - let e: i64 = 41; - let r: i64 = transmute(vqsubd_s64(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] unsafe fn test_vqsubb_u8() { let a: u8 = 42; let b: u8 = 1; @@ -12144,6 +12200,24 @@ mod test { } #[simd_test(enable = "neon")] + unsafe fn test_vqsubs_s32() { + let a: i32 = 42; + let b: i32 = 1; + let e: i32 = 41; + let r: i32 = transmute(vqsubs_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsubd_s64() { + let a: i64 = 42; + let b: i64 = 1; + let e: i64 = 41; + let r: i64 = transmute(vqsubd_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] unsafe fn test_vrbit_s8() { let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14); let e: i8x8 = i8x8::new(0, 64, 32, 96, 16, 80, 48, 112); @@ -12418,24 +12492,6 @@ mod test { } #[simd_test(enable = "neon")] - unsafe fn test_vqadds_s32() { - let a: i32 = 42; - let b: i32 = 1; - let e: i32 = 43; - let r: i32 = transmute(vqadds_s32(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vqaddd_s64() { - let a: i64 = 42; - let b: i64 = 1; - let e: i64 = 43; - let r: i64 = transmute(vqaddd_s64(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] unsafe fn test_vqaddb_u8() { let a: u8 = 42; let b: u8 = 1; @@ -12472,6 +12528,24 @@ mod test { } #[simd_test(enable = "neon")] + unsafe fn test_vqadds_s32() { + let a: i32 = 42; + let b: i32 = 1; + let e: i32 = 43; + let r: i32 = transmute(vqadds_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqaddd_s64() { + let a: i64 = 42; + let b: i64 = 1; + let e: i64 = 43; + let r: i64 = transmute(vqaddd_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] unsafe fn test_vmul_f64() { let a: f64 = 1.0; let b: f64 = 2.0; @@ -13737,14 +13811,6 @@ mod test { } #[simd_test(enable = "neon")] - unsafe fn test_vqmovnd_s64() { - let a: i64 = 1; - let e: i32 = 1; - let r: i32 = transmute(vqmovnd_s64(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] unsafe fn test_vqmovnh_u16() { let a: u16 = 1; let e: u8 = 1; @@ -13761,6 +13827,14 @@ mod test { } #[simd_test(enable = "neon")] + unsafe fn test_vqmovnd_s64() { + let a: i64 = 1; + let e: i32 = 1; + let r: i32 = transmute(vqmovnd_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] unsafe fn test_vqmovnd_u64() { let a: u64 = 1; let e: u32 = 1; @@ -14048,6 +14122,24 @@ mod test { } #[simd_test(enable = "neon")] + unsafe fn test_vqrshls_s32() { + let a: i32 = 2; + let b: i32 = 2; + let e: i32 = 8; + let r: i32 = transmute(vqrshls_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshld_s64() { + let a: i64 = 2; + let b: i64 = 2; + let e: i64 = 8; + let r: i64 = transmute(vqrshld_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] unsafe fn test_vqrshlb_s8() { let a: i8 = 1; let b: i8 = 2; @@ -14066,20 +14158,20 @@ mod test { } #[simd_test(enable = "neon")] - unsafe fn test_vqrshls_s32() { - let a: i32 = 1; + unsafe fn test_vqrshls_u32() { + let a: u32 = 2; let b: i32 = 2; - let e: i32 = 4; - let r: i32 = transmute(vqrshls_s32(transmute(a), transmute(b))); + let e: u32 = 8; + let r: u32 = transmute(vqrshls_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vqrshld_s64() { - let a: i64 = 1; + unsafe fn test_vqrshld_u64() { + let a: u64 = 2; let b: i64 = 2; - let e: i64 = 4; - let r: i64 = transmute(vqrshld_s64(transmute(a), transmute(b))); + let e: u64 = 8; + let r: u64 = transmute(vqrshld_u64(transmute(a), transmute(b))); assert_eq!(r, e); } @@ -14102,24 +14194,6 @@ mod test { } #[simd_test(enable = "neon")] - unsafe fn test_vqrshls_u32() { - let a: u32 = 1; - let b: i32 = 2; - let e: u32 = 4; - let r: u32 = transmute(vqrshls_u32(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vqrshld_u64() { - let a: u64 = 1; - let b: i64 = 2; - let e: u64 = 4; - let r: u64 = transmute(vqrshld_u64(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] unsafe fn test_vqrshrnh_n_s16() { let a: i16 = 4; let e: i8 = 1; @@ -14273,6 +14347,15 @@ mod test { } #[simd_test(enable = "neon")] + unsafe fn test_vqshld_s64() { + let a: i64 = 0; + let b: i64 = 2; + let e: i64 = 0; + let r: i64 = transmute(vqshld_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] unsafe fn test_vqshlb_s8() { let a: i8 = 1; let b: i8 = 2; @@ -14300,11 +14383,11 @@ mod test { } #[simd_test(enable = "neon")] - unsafe fn test_vqshld_s64() { - let a: i64 = 1; + unsafe fn test_vqshld_u64() { + let a: u64 = 0; let b: i64 = 2; - let e: i64 = 4; - let r: i64 = transmute(vqshld_s64(transmute(a), transmute(b))); + let e: u64 = 0; + let r: u64 = transmute(vqshld_u64(transmute(a), transmute(b))); assert_eq!(r, e); } @@ -14336,15 +14419,6 @@ mod test { } #[simd_test(enable = "neon")] - unsafe fn test_vqshld_u64() { - let a: u64 = 1; - let b: i64 = 2; - let e: u64 = 4; - let r: u64 = transmute(vqshld_u64(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] unsafe fn test_vqshlb_n_s8() { let a: i8 = 1; let e: i8 = 4; @@ -14409,6 +14483,14 @@ mod test { } #[simd_test(enable = "neon")] + unsafe fn test_vqshrnd_n_s64() { + let a: i64 = 0; + let e: i32 = 0; + let r: i32 = transmute(vqshrnd_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] unsafe fn test_vqshrnh_n_s16() { let a: i16 = 4; let e: i8 = 1; @@ -14425,14 +14507,6 @@ mod test { } #[simd_test(enable = "neon")] - unsafe fn test_vqshrnd_n_s64() { - let a: i64 = 4; - let e: i32 = 1; - let r: i32 = transmute(vqshrnd_n_s64::<2>(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] unsafe fn test_vqshrn_high_n_s16() { let a: i8x8 = i8x8::new(0, 1, 8, 9, 8, 9, 10, 11); let b: i16x8 = i16x8::new(32, 36, 40, 44, 48, 52, 56, 60); @@ -14460,6 +14534,14 @@ mod test { } #[simd_test(enable = "neon")] + unsafe fn test_vqshrnd_n_u64() { + let a: u64 = 0; + let e: u32 = 0; + let r: u32 = transmute(vqshrnd_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] unsafe fn test_vqshrnh_n_u16() { let a: u16 = 4; let e: u8 = 1; @@ -14476,14 +14558,6 @@ mod test { } #[simd_test(enable = "neon")] - unsafe fn test_vqshrnd_n_u64() { - let a: u64 = 4; - let e: u32 = 1; - let r: u32 = transmute(vqshrnd_n_u64::<2>(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] unsafe fn test_vqshrn_high_n_u16() { let a: u8x8 = u8x8::new(0, 1, 8, 9, 8, 9, 10, 11); let b: u16x8 = u16x8::new(32, 36, 40, 44, 48, 52, 56, 60); diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs index e29c1b36d25..9097d269893 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs @@ -1184,9 +1184,7 @@ pub unsafe fn vadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(add))] pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 { - let a: int64x1_t = transmute(a); - let b: int64x1_t = transmute(b); - simd_extract(simd_add(a, b), 0) + a.wrapping_add(b) } /// Vector add. @@ -1194,9 +1192,7 @@ pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(add))] pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 { - let a: uint64x1_t = transmute(a); - let b: uint64x1_t = transmute(b); - simd_extract(simd_add(a, b), 0) + a.wrapping_add(b) } /// Horizontal vector max. diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs index 835a3aba749..0387799f6f4 100644 --- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs @@ -13070,7 +13070,7 @@ pub unsafe fn vset_lane_s8<const LANE: i32>(a: i8, b: int8x8_t) -> int8x8_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vset_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> int16x4_t { - static_assert_imm4!(LANE); + static_assert_imm2!(LANE); simd_insert(b, LANE as u32, a) } @@ -13082,7 +13082,7 @@ pub unsafe fn vset_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> int16x4_t #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vset_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> int32x2_t { - static_assert_imm5!(LANE); + static_assert_imm1!(LANE); simd_insert(b, LANE as u32, a) } @@ -13094,7 +13094,7 @@ pub unsafe fn vset_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> int32x2_t #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vset_lane_s64<const LANE: i32>(a: i64, b: int64x1_t) -> int64x1_t { - static_assert_imm6!(LANE); + static_assert!(LANE : i32 where LANE == 0); simd_insert(b, LANE as u32, a) } @@ -13118,7 +13118,7 @@ pub unsafe fn vset_lane_u8<const LANE: i32>(a: u8, b: uint8x8_t) -> uint8x8_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vset_lane_u16<const LANE: i32>(a: u16, b: uint16x4_t) -> uint16x4_t { - static_assert_imm4!(LANE); + static_assert_imm2!(LANE); simd_insert(b, LANE as u32, a) } @@ -13130,7 +13130,7 @@ pub unsafe fn vset_lane_u16<const LANE: i32>(a: u16, b: uint16x4_t) -> uint16x4_ #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vset_lane_u32<const LANE: i32>(a: u32, b: uint32x2_t) -> uint32x2_t { - static_assert_imm5!(LANE); + static_assert_imm1!(LANE); simd_insert(b, LANE as u32, a) } @@ -13142,7 +13142,7 @@ pub unsafe fn vset_lane_u32<const LANE: i32>(a: u32, b: uint32x2_t) -> uint32x2_ #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vset_lane_u64<const LANE: i32>(a: u64, b: uint64x1_t) -> uint64x1_t { - static_assert_imm6!(LANE); + static_assert!(LANE : i32 where LANE == 0); simd_insert(b, LANE as u32, a) } @@ -13166,7 +13166,7 @@ pub unsafe fn vset_lane_p8<const LANE: i32>(a: p8, b: poly8x8_t) -> poly8x8_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vset_lane_p16<const LANE: i32>(a: p16, b: poly16x4_t) -> poly16x4_t { - static_assert_imm4!(LANE); + static_assert_imm2!(LANE); simd_insert(b, LANE as u32, a) } @@ -13178,7 +13178,7 @@ pub unsafe fn vset_lane_p16<const LANE: i32>(a: p16, b: poly16x4_t) -> poly16x4_ #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vset_lane_p64<const LANE: i32>(a: p64, b: poly64x1_t) -> poly64x1_t { - static_assert_imm6!(LANE); + static_assert!(LANE : i32 where LANE == 0); simd_insert(b, LANE as u32, a) } @@ -13190,7 +13190,7 @@ pub unsafe fn vset_lane_p64<const LANE: i32>(a: p64, b: poly64x1_t) -> poly64x1_ #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vsetq_lane_s8<const LANE: i32>(a: i8, b: int8x16_t) -> int8x16_t { - static_assert_imm3!(LANE); + static_assert_imm4!(LANE); simd_insert(b, LANE as u32, a) } @@ -13202,7 +13202,7 @@ pub unsafe fn vsetq_lane_s8<const LANE: i32>(a: i8, b: int8x16_t) -> int8x16_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vsetq_lane_s16<const LANE: i32>(a: i16, b: int16x8_t) -> int16x8_t { - static_assert_imm4!(LANE); + static_assert_imm3!(LANE); simd_insert(b, LANE as u32, a) } @@ -13214,7 +13214,7 @@ pub unsafe fn vsetq_lane_s16<const LANE: i32>(a: i16, b: int16x8_t) -> int16x8_t #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vsetq_lane_s32<const LANE: i32>(a: i32, b: int32x4_t) -> int32x4_t { - static_assert_imm5!(LANE); + static_assert_imm2!(LANE); simd_insert(b, LANE as u32, a) } @@ -13226,7 +13226,7 @@ pub unsafe fn vsetq_lane_s32<const LANE: i32>(a: i32, b: int32x4_t) -> int32x4_t #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vsetq_lane_s64<const LANE: i32>(a: i64, b: int64x2_t) -> int64x2_t { - static_assert_imm6!(LANE); + static_assert_imm1!(LANE); simd_insert(b, LANE as u32, a) } @@ -13238,7 +13238,7 @@ pub unsafe fn vsetq_lane_s64<const LANE: i32>(a: i64, b: int64x2_t) -> int64x2_t #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vsetq_lane_u8<const LANE: i32>(a: u8, b: uint8x16_t) -> uint8x16_t { - static_assert_imm3!(LANE); + static_assert_imm4!(LANE); simd_insert(b, LANE as u32, a) } @@ -13250,7 +13250,7 @@ pub unsafe fn vsetq_lane_u8<const LANE: i32>(a: u8, b: uint8x16_t) -> uint8x16_t #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vsetq_lane_u16<const LANE: i32>(a: u16, b: uint16x8_t) -> uint16x8_t { - static_assert_imm4!(LANE); + static_assert_imm3!(LANE); simd_insert(b, LANE as u32, a) } @@ -13262,7 +13262,7 @@ pub unsafe fn vsetq_lane_u16<const LANE: i32>(a: u16, b: uint16x8_t) -> uint16x8 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vsetq_lane_u32<const LANE: i32>(a: u32, b: uint32x4_t) -> uint32x4_t { - static_assert_imm5!(LANE); + static_assert_imm2!(LANE); simd_insert(b, LANE as u32, a) } @@ -13274,7 +13274,7 @@ pub unsafe fn vsetq_lane_u32<const LANE: i32>(a: u32, b: uint32x4_t) -> uint32x4 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vsetq_lane_u64<const LANE: i32>(a: u64, b: uint64x2_t) -> uint64x2_t { - static_assert_imm6!(LANE); + static_assert_imm1!(LANE); simd_insert(b, LANE as u32, a) } @@ -13286,7 +13286,7 @@ pub unsafe fn vsetq_lane_u64<const LANE: i32>(a: u64, b: uint64x2_t) -> uint64x2 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vsetq_lane_p8<const LANE: i32>(a: p8, b: poly8x16_t) -> poly8x16_t { - static_assert_imm3!(LANE); + static_assert_imm4!(LANE); simd_insert(b, LANE as u32, a) } @@ -13298,7 +13298,7 @@ pub unsafe fn vsetq_lane_p8<const LANE: i32>(a: p8, b: poly8x16_t) -> poly8x16_t #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vsetq_lane_p16<const LANE: i32>(a: p16, b: poly16x8_t) -> poly16x8_t { - static_assert_imm4!(LANE); + static_assert_imm3!(LANE); simd_insert(b, LANE as u32, a) } @@ -13310,7 +13310,7 @@ pub unsafe fn vsetq_lane_p16<const LANE: i32>(a: p16, b: poly16x8_t) -> poly16x8 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vsetq_lane_p64<const LANE: i32>(a: p64, b: poly64x2_t) -> poly64x2_t { - static_assert_imm6!(LANE); + static_assert_imm1!(LANE); simd_insert(b, LANE as u32, a) } @@ -21006,144 +21006,144 @@ mod test { #[simd_test(enable = "neon")] unsafe fn test_vqrshl_s8() { - let a: i8x8 = i8x8::new(-128, 0x7F, 2, 3, 4, 5, 6, 7); + let a: i8x8 = i8x8::new(2, -128, 0x7F, 3, 4, 5, 6, 7); let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); - let e: i8x8 = i8x8::new(-128, 0x7F, 8, 12, 16, 20, 24, 28); + let e: i8x8 = i8x8::new(8, -128, 0x7F, 12, 16, 20, 24, 28); let r: i8x8 = transmute(vqrshl_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshlq_s8() { - let a: i8x16 = i8x16::new(-128, 0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let a: i8x16 = i8x16::new(2, -128, 0x7F, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); - let e: i8x16 = i8x16::new(-128, 0x7F, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60); + let e: i8x16 = i8x16::new(8, -128, 0x7F, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60); let r: i8x16 = transmute(vqrshlq_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshl_s16() { - let a: i16x4 = i16x4::new(-32768, 0x7F_FF, 2, 3); + let a: i16x4 = i16x4::new(2, -32768, 0x7F_FF, 3); let b: i16x4 = i16x4::new(2, 2, 2, 2); - let e: i16x4 = i16x4::new(-32768, 0x7F_FF, 8, 12); + let e: i16x4 = i16x4::new(8, -32768, 0x7F_FF, 12); let r: i16x4 = transmute(vqrshl_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshlq_s16() { - let a: i16x8 = i16x8::new(-32768, 0x7F_FF, 2, 3, 4, 5, 6, 7); + let a: i16x8 = i16x8::new(2, -32768, 0x7F_FF, 3, 4, 5, 6, 7); let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); - let e: i16x8 = i16x8::new(-32768, 0x7F_FF, 8, 12, 16, 20, 24, 28); + let e: i16x8 = i16x8::new(8, -32768, 0x7F_FF, 12, 16, 20, 24, 28); let r: i16x8 = transmute(vqrshlq_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshl_s32() { - let a: i32x2 = i32x2::new(-2147483648, 0x7F_FF_FF_FF); + let a: i32x2 = i32x2::new(2, -2147483648); let b: i32x2 = i32x2::new(2, 2); - let e: i32x2 = i32x2::new(-2147483648, 0x7F_FF_FF_FF); + let e: i32x2 = i32x2::new(8, -2147483648); let r: i32x2 = transmute(vqrshl_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshlq_s32() { - let a: i32x4 = i32x4::new(-2147483648, 0x7F_FF_FF_FF, 2, 3); + let a: i32x4 = i32x4::new(2, -2147483648, 0x7F_FF_FF_FF, 3); let b: i32x4 = i32x4::new(2, 2, 2, 2); - let e: i32x4 = i32x4::new(-2147483648, 0x7F_FF_FF_FF, 8, 12); + let e: i32x4 = i32x4::new(8, -2147483648, 0x7F_FF_FF_FF, 12); let r: i32x4 = transmute(vqrshlq_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshl_s64() { - let a: i64x1 = i64x1::new(-9223372036854775808); + let a: i64x1 = i64x1::new(2); let b: i64x1 = i64x1::new(2); - let e: i64x1 = i64x1::new(-9223372036854775808); + let e: i64x1 = i64x1::new(8); let r: i64x1 = transmute(vqrshl_s64(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshlq_s64() { - let a: i64x2 = i64x2::new(-9223372036854775808, 0x7F_FF_FF_FF_FF_FF_FF_FF); + let a: i64x2 = i64x2::new(2, -9223372036854775808); let b: i64x2 = i64x2::new(2, 2); - let e: i64x2 = i64x2::new(-9223372036854775808, 0x7F_FF_FF_FF_FF_FF_FF_FF); + let e: i64x2 = i64x2::new(8, -9223372036854775808); let r: i64x2 = transmute(vqrshlq_s64(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshl_u8() { - let a: u8x8 = u8x8::new(0, 0xFF, 2, 3, 4, 5, 6, 7); + let a: u8x8 = u8x8::new(2, 0, 0xFF, 3, 4, 5, 6, 7); let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); - let e: u8x8 = u8x8::new(0, 0xFF, 8, 12, 16, 20, 24, 28); + let e: u8x8 = u8x8::new(8, 0, 0xFF, 12, 16, 20, 24, 28); let r: u8x8 = transmute(vqrshl_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshlq_u8() { - let a: u8x16 = u8x16::new(0, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let a: u8x16 = u8x16::new(2, 0, 0xFF, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); - let e: u8x16 = u8x16::new(0, 0xFF, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60); + let e: u8x16 = u8x16::new(8, 0, 0xFF, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60); let r: u8x16 = transmute(vqrshlq_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshl_u16() { - let a: u16x4 = u16x4::new(0, 0xFF_FF, 2, 3); + let a: u16x4 = u16x4::new(2, 0, 0xFF_FF, 3); let b: i16x4 = i16x4::new(2, 2, 2, 2); - let e: u16x4 = u16x4::new(0, 0xFF_FF, 8, 12); + let e: u16x4 = u16x4::new(8, 0, 0xFF_FF, 12); let r: u16x4 = transmute(vqrshl_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshlq_u16() { - let a: u16x8 = u16x8::new(0, 0xFF_FF, 2, 3, 4, 5, 6, 7); + let a: u16x8 = u16x8::new(2, 0, 0xFF_FF, 3, 4, 5, 6, 7); let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); - let e: u16x8 = u16x8::new(0, 0xFF_FF, 8, 12, 16, 20, 24, 28); + let e: u16x8 = u16x8::new(8, 0, 0xFF_FF, 12, 16, 20, 24, 28); let r: u16x8 = transmute(vqrshlq_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshl_u32() { - let a: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF); + let a: u32x2 = u32x2::new(2, 0); let b: i32x2 = i32x2::new(2, 2); - let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF); + let e: u32x2 = u32x2::new(8, 0); let r: u32x2 = transmute(vqrshl_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshlq_u32() { - let a: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 2, 3); + let a: u32x4 = u32x4::new(2, 0, 0xFF_FF_FF_FF, 3); let b: i32x4 = i32x4::new(2, 2, 2, 2); - let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 8, 12); + let e: u32x4 = u32x4::new(8, 0, 0xFF_FF_FF_FF, 12); let r: u32x4 = transmute(vqrshlq_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshl_u64() { - let a: u64x1 = u64x1::new(0); + let a: u64x1 = u64x1::new(2); let b: i64x1 = i64x1::new(2); - let e: u64x1 = u64x1::new(0); + let e: u64x1 = u64x1::new(8); let r: u64x1 = transmute(vqrshl_u64(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshlq_u64() { - let a: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let a: u64x2 = u64x2::new(2, 0); let b: i64x2 = i64x2::new(2, 2); - let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let e: u64x2 = u64x2::new(8, 0); let r: u64x2 = transmute(vqrshlq_u64(transmute(a), transmute(b))); assert_eq!(r, e); } diff --git a/library/stdarch/crates/core_arch/src/x86/macros.rs b/library/stdarch/crates/core_arch/src/x86/macros.rs index b9550ce79c4..e686e65b303 100644 --- a/library/stdarch/crates/core_arch/src/x86/macros.rs +++ b/library/stdarch/crates/core_arch/src/x86/macros.rs @@ -5,7 +5,10 @@ pub(crate) struct ValidateConstRound<const IMM: i32>; impl<const IMM: i32> ValidateConstRound<IMM> { pub(crate) const VALID: () = { - assert!(IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11, "Invalid IMM value"); + assert!( + IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11, + "Invalid IMM value" + ); }; } @@ -70,7 +73,10 @@ macro_rules! static_assert_imm_u8 { pub(crate) struct ValidateConstGatherScale<const SCALE: i32>; impl<const SCALE: i32> ValidateConstGatherScale<SCALE> { pub(crate) const VALID: () = { - assert!(SCALE == 1 || SCALE == 2 || SCALE == 4 || SCALE == 8, "Invalid SCALE value"); + assert!( + SCALE == 1 || SCALE == 2 || SCALE == 4 || SCALE == 8, + "Invalid SCALE value" + ); }; } diff --git a/library/stdarch/crates/core_arch/src/x86_64/macros.rs b/library/stdarch/crates/core_arch/src/x86_64/macros.rs index 9e3faf444d3..a3ea0e82163 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/macros.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/macros.rs @@ -5,7 +5,10 @@ pub(crate) struct ValidateConstRound<const IMM: i32>; impl<const IMM: i32> ValidateConstRound<IMM> { pub(crate) const VALID: () = { - assert!(IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11, "Invalid IMM value"); + assert!( + IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11, + "Invalid IMM value" + ); }; } diff --git a/library/stdarch/crates/stdarch-gen/neon.spec b/library/stdarch/crates/stdarch-gen/neon.spec index 4b192069b6f..825ecf51155 100644 --- a/library/stdarch/crates/stdarch-gen/neon.spec +++ b/library/stdarch/crates/stdarch-gen/neon.spec @@ -1843,9 +1843,23 @@ b = 1 validate 41 aarch64 = sqsub -generate i8, i16, i32, i64 +generate i8, i16 aarch64 = uqsub -generate u8, u16, u32, u64 +generate u8, u16 + +/// Saturating subtract +name = vqsub +a = 42 +b = 1 +validate 41 + +aarch64 = uqsub +link-aarch64 = uqsub._EXT_ +generate u32, u64 + +aarch64 = sqsub +link-aarch64 = sqsub._EXT_ +generate i32, i64 /// Halving add name = vhadd @@ -1999,9 +2013,23 @@ b = 1 validate 43 aarch64 = sqadd -generate i8, i16, i32, i64 +generate i8, i16 aarch64 = uqadd -generate u8, u16, u32, u64 +generate u8, u16 + +/// Saturating add +name = vqadd +a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58 + +aarch64 = uqadd +link-aarch64 = uqadd._EXT_ +generate u32, u64 + +aarch64 = sqadd +link-aarch64 = sqadd._EXT_ +generate i32, i64 /// Multiply name = vmul @@ -3383,9 +3411,22 @@ a = 1 validate 1 aarch64 = sqxtn -generate i16:i8, i32:i16, i64:i32 +generate i16:i8, i32:i16 aarch64 = uqxtn -generate u16:u8, u32:u16, u64:u32 +generate u16:u8, u32:u16 + +/// Saturating extract narrow +name = vqmovn +a = 1 +validate 1 + +aarch64 = sqxtn +link-aarch64 = scalar.sqxtn._EXT2_._EXT_ +generate i64:i32 + +aarch64 = uqxtn +link-aarch64 = scalar.uqxtn._EXT2_._EXT_ +generate u64:u32 /// Signed saturating extract narrow name = vqmovn_high @@ -3609,12 +3650,13 @@ generate i16:i16:int16x4_t:i16, i16:i16:int16x8_t:i16, i32:i32:int32x2_t:i32, i3 /// Signed saturating rounding shift left name = vqrshl -a = MIN, MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +a = 2, MIN, MAX, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 -validate MIN, MAX, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 +validate 8, MIN, MAX, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 aarch64 = sqrshl link-aarch64 = sqrshl._EXT_ +generate i32, i64 arm = vqrshl link-arm = vqrshifts._EXT_ @@ -3630,17 +3672,18 @@ b = 2 validate 4 aarch64 = sqrshl -generate i8, i16, i32, i64 +generate i8, i16 /// Unsigned signed saturating rounding shift left name = vqrshl out-suffix -a = MIN, MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +a = 2, MIN, MAX, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 -validate 0, MAX, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 +validate 8, 0, MAX, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 aarch64 = uqrshl link-aarch64 = uqrshl._EXT_ +generate u32:i32:u32, u64:i64:u64 arm = vqrshl link-arm = vqrshiftu._EXT_ @@ -3658,7 +3701,7 @@ b = 2 validate 4 aarch64 = uqrshl -generate u8:i8:u8, u16:i16:u16, u32:i32:u32, u64:i64:u64 +generate u8:i8:u8, u16:i16:u16 /// Signed saturating rounded shift right narrow name = vqrshrn @@ -3806,6 +3849,7 @@ validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 aarch64 = sqshl link-aarch64 = sqshl._EXT_ +generate i64 arm = vqshl link-arm = vqshifts._EXT_ @@ -3820,7 +3864,7 @@ b = 2 validate 4 aarch64 = sqshl -generate i8, i16, i32, i64 +generate i8, i16, i32 /// Unsigned saturating shift left name = vqshl @@ -3831,6 +3875,7 @@ validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 aarch64 = uqshl link-aarch64 = uqshl._EXT_ +generate u64:i64:u64 arm = vqshl link-arm = vqshiftu._EXT_ @@ -3847,7 +3892,7 @@ b = 2 validate 4 aarch64 = uqshl -generate u8:i8:u8, u16:i16:u16, u32:i32:u32, u64:i64:u64 +generate u8:i8:u8, u16:i16:u16, u32:i32:u32 /// Signed saturating shift left name = vqshl @@ -3915,6 +3960,7 @@ validate 0, 1, 2, 3, 4, 5, 6, 7 aarch64 = sqshrn link-aarch64 = sqshrn._EXT2_ const-aarch64 = N +generate i64:i32 arm = vqshrn link-arm = vqshiftns._EXT2_ @@ -3932,7 +3978,7 @@ n = 2 validate 1 aarch64 = sqshrn -generate i16:i8, i32:i16, i64:i32 +generate i16:i8, i32:i16 /// Signed saturating shift right narrow name = vqshrn_high @@ -3960,6 +4006,7 @@ validate 0, 1, 2, 3, 4, 5, 6, 7 aarch64 = uqshrn link-aarch64 = uqshrn._EXT2_ const-aarch64 = N +generate u64:u32 arm = vqshrn link-arm = vqshiftnu._EXT2_ @@ -3977,7 +4024,7 @@ n = 2 validate 1 aarch64 = uqshrn -generate u16:u8, u32:u16, u64:u32 +generate u16:u8, u32:u16 /// Unsigned saturating shift right narrow name = vqshrn_high @@ -4261,21 +4308,12 @@ validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 aarch64 = srshl link-aarch64 = srshl._EXT_ +generate i64 arm = vrshl link-arm = vrshifts._EXT_ generate int*_t, int64x*_t -/// Signed rounding shift left -name = vrshl -multi_fn = transmute, {vrshl-in_ntt-noext, transmute(a), transmute(b)} -a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 -b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 -validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 - -aarch64 = srshl -generate i64 - /// Unsigned rounding shift left name = vrshl out-suffix @@ -4285,23 +4323,13 @@ validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 aarch64 = urshl link-aarch64 = urshl._EXT_ +generate u64:i64:u64 arm = vrshl link-arm = vrshiftu._EXT_ generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t -/// Unsigned rounding shift left -name = vrshl -out-suffix -multi_fn = transmute, {vrshl-out_ntt-noext, transmute(a), transmute(b)} -a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 -b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 -validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 - -aarch64 = urshl -generate u64:i64:u64 - /// Signed rounding shift right name = vrshr n-suffix @@ -4438,15 +4466,14 @@ name = vrsra n-suffix constn = N multi_fn = static_assert-N-1-bits -multi_fn = vrshr_n-in_ntt-::<N>, b:in_ntt, transmute(b) -multi_fn = transmute, {simd_add, transmute(a), b} +multi_fn = vrshr-nself-::<N>, b:in_t, b +multi_fn = a + b a = 1 b = 4 n = 2 validate 2 -// We use "nop" here to skip the instruction test, since it cannot be optimized correctly. -aarch64 = nop +aarch64 = srsra generate i64 /// Ungisned rounding shift right and accumulate. @@ -4454,21 +4481,20 @@ name = vrsra n-suffix constn = N multi_fn = static_assert-N-1-bits -multi_fn = vrshr_n-in_ntt-::<N>, b:in_ntt, transmute(b) -multi_fn = transmute, {simd_add, transmute(a), b} +multi_fn = vrshr-nself-::<N>, b:in_t, b +multi_fn = a + b a = 1 b = 4 n = 2 validate 2 -// We use "nop" here to skip the instruction test, since it cannot be optimized correctly. -aarch64 = nop +aarch64 = ursra generate u64 /// Insert vector element from another vector element name = vset_lane constn = LANE -multi_fn = static_assert_imm-in_bits_exp_len-LANE +multi_fn = static_assert_imm-in_exp_len-LANE multi_fn = simd_insert, b, LANE as u32, a a = 1 b = 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 @@ -4490,7 +4516,7 @@ generate p64:poly64x1_t:poly64x1_t name = vsetq_lane no-q constn = LANE -multi_fn = static_assert_imm-in_bits_exp_len-LANE +multi_fn = static_assert_imm-in_exp_len-LANE multi_fn = simd_insert, b, LANE as u32, a a = 1 b = 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 @@ -4547,10 +4573,10 @@ a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 -arm = vshl -link-arm = vshifts._EXT_ aarch64 = sshl link-aarch64 = sshl._EXT_ +arm = vshl +link-arm = vshifts._EXT_ generate int*_t, int64x*_t /// Signed Shift left @@ -4570,10 +4596,10 @@ a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 -arm = vshl -link-arm = vshiftu._EXT_ aarch64 = ushl link-aarch64 = ushl._EXT_ +arm = vshl +link-arm = vshiftu._EXT_ generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t |
