diff options
| author | Sparrow Li <liyuan179@huawei.com> | 2021-04-12 21:08:26 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-04-12 14:08:26 +0100 |
| commit | 88a5de08cba5e9010bca68e238bca4c2ac4161df (patch) | |
| tree | fbe1d621604d465685b48dd00aee79fe8accdd0b /library/stdarch/crates | |
| parent | b411a5c3751019ebe25da37db3409f53b0ccf9ee (diff) | |
| download | rust-88a5de08cba5e9010bca68e238bca4c2ac4161df.tar.gz rust-88a5de08cba5e9010bca68e238bca4c2ac4161df.zip | |
Allow primitive types in the code generator and add vdup instructions (#1114)
Diffstat (limited to 'library/stdarch/crates')
| -rw-r--r-- | library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs | 576 | ||||
| -rw-r--r-- | library/stdarch/crates/core_arch/src/arm/neon/generated.rs | 880 | ||||
| -rw-r--r-- | library/stdarch/crates/stdarch-gen/neon.spec | 142 | ||||
| -rw-r--r-- | library/stdarch/crates/stdarch-gen/src/main.rs | 202 | ||||
| -rw-r--r-- | library/stdarch/crates/stdarch-test/src/lib.rs | 8 |
5 files changed, 1770 insertions, 38 deletions
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index 5aed80071fd..c2b657d3d9d 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -1664,6 +1664,326 @@ pub unsafe fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t { vcvtpq_u64_f64_(a) } +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(dup, N = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_laneq_p64<const N: i32>(a: poly64x2_t) -> poly64x2_t { + static_assert_imm1!(N); + simd_shuffle2(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(dup, N = 0))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_lane_p64<const N: i32>(a: poly64x1_t) -> poly64x2_t { + static_assert!(N : i32 where N == 0); + simd_shuffle2(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(dup, N = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_laneq_f64<const N: i32>(a: float64x2_t) -> float64x2_t { + static_assert_imm1!(N); + simd_shuffle2(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(dup, N = 0))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_lane_f64<const N: i32>(a: float64x1_t) -> float64x2_t { + static_assert!(N : i32 where N == 0); + simd_shuffle2(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 0))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_lane_p64<const N: i32>(a: poly64x1_t) -> poly64x1_t { + static_assert!(N : i32 where N == 0); + a +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 0))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_lane_f64<const N: i32>(a: float64x1_t) -> float64x1_t { + static_assert!(N : i32 where N == 0); + a +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_laneq_p64<const N: i32>(a: poly64x2_t) -> poly64x1_t { + static_assert_imm1!(N); + transmute::<u64, _>(simd_extract(a, N as u32)) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_laneq_f64<const N: i32>(a: float64x2_t) -> float64x1_t { + static_assert_imm1!(N); + transmute::<f64, _>(simd_extract(a, N as u32)) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 4))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupb_lane_s8<const N: i32>(a: int8x8_t) -> i8 { + static_assert_imm3!(N); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupb_laneq_s8<const N: i32>(a: int8x16_t) -> i8 { + static_assert_imm4!(N); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vduph_lane_s16<const N: i32>(a: int16x4_t) -> i16 { + static_assert_imm2!(N); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 4))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vduph_laneq_s16<const N: i32>(a: int16x8_t) -> i16 { + static_assert_imm3!(N); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdups_lane_s32<const N: i32>(a: int32x2_t) -> i32 { + static_assert_imm1!(N); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdups_laneq_s32<const N: i32>(a: int32x4_t) -> i32 { + static_assert_imm2!(N); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 0))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupd_lane_s64<const N: i32>(a: int64x1_t) -> i64 { + static_assert!(N : i32 where N == 0); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupd_laneq_s64<const N: i32>(a: int64x2_t) -> i64 { + static_assert_imm1!(N); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 4))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupb_lane_u8<const N: i32>(a: uint8x8_t) -> u8 { + static_assert_imm3!(N); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupb_laneq_u8<const N: i32>(a: uint8x16_t) -> u8 { + static_assert_imm4!(N); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vduph_lane_u16<const N: i32>(a: uint16x4_t) -> u16 { + static_assert_imm2!(N); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 4))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vduph_laneq_u16<const N: i32>(a: uint16x8_t) -> u16 { + static_assert_imm3!(N); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdups_lane_u32<const N: i32>(a: uint32x2_t) -> u32 { + static_assert_imm1!(N); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdups_laneq_u32<const N: i32>(a: uint32x4_t) -> u32 { + static_assert_imm2!(N); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 0))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupd_lane_u64<const N: i32>(a: uint64x1_t) -> u64 { + static_assert!(N : i32 where N == 0); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupd_laneq_u64<const N: i32>(a: uint64x2_t) -> u64 { + static_assert_imm1!(N); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 4))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupb_lane_p8<const N: i32>(a: poly8x8_t) -> p8 { + static_assert_imm3!(N); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupb_laneq_p8<const N: i32>(a: poly8x16_t) -> p8 { + static_assert_imm4!(N); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vduph_lane_p16<const N: i32>(a: poly16x4_t) -> p16 { + static_assert_imm2!(N); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 4))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vduph_laneq_p16<const N: i32>(a: poly16x8_t) -> p16 { + static_assert_imm3!(N); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdups_lane_f32<const N: i32>(a: float32x2_t) -> f32 { + static_assert_imm1!(N); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdups_laneq_f32<const N: i32>(a: float32x4_t) -> f32 { + static_assert_imm2!(N); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 0))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupd_lane_f64<const N: i32>(a: float64x1_t) -> f64 { + static_assert!(N : i32 where N == 0); + simd_extract(a, N as u32) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupd_laneq_f64<const N: i32>(a: float64x2_t) -> f64 { + static_assert_imm1!(N); + simd_extract(a, N as u32) +} + /// Extract vector from pair of vectors #[inline] #[target_feature(enable = "neon")] @@ -6025,6 +6345,262 @@ mod test { } #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_p64() { + let a: i64x2 = i64x2::new(1, 1); + let e: i64x2 = i64x2::new(1, 1); + let r: i64x2 = transmute(vdupq_laneq_p64::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_p64() { + let a: i64x1 = i64x1::new(1); + let e: i64x2 = i64x2::new(1, 1); + let r: i64x2 = transmute(vdupq_lane_p64::<0>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_f64() { + let a: f64x2 = f64x2::new(1., 1.); + let e: f64x2 = f64x2::new(1., 1.); + let r: f64x2 = transmute(vdupq_laneq_f64::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_f64() { + let a: f64 = 1.; + let e: f64x2 = f64x2::new(1., 1.); + let r: f64x2 = transmute(vdupq_lane_f64::<0>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_p64() { + let a: i64x1 = i64x1::new(0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vdup_lane_p64::<0>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_f64() { + let a: f64 = 0.; + let e: f64 = 0.; + let r: f64 = transmute(vdup_lane_f64::<0>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_p64() { + let a: i64x2 = i64x2::new(0, 1); + let e: i64x1 = i64x1::new(1); + let r: i64x1 = transmute(vdup_laneq_p64::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_f64() { + let a: f64x2 = f64x2::new(0., 1.); + let e: f64 = 1.; + let r: f64 = transmute(vdup_laneq_f64::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupb_lane_s8() { + let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i8 = 1; + let r: i8 = transmute(vdupb_lane_s8::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupb_laneq_s8() { + let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); + let e: i8 = 1; + let r: i8 = transmute(vdupb_laneq_s8::<8>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vduph_lane_s16() { + let a: i16x4 = i16x4::new(1, 1, 1, 4); + let e: i16 = 1; + let r: i16 = transmute(vduph_lane_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vduph_laneq_s16() { + let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i16 = 1; + let r: i16 = transmute(vduph_laneq_s16::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdups_lane_s32() { + let a: i32x2 = i32x2::new(1, 1); + let e: i32 = 1; + let r: i32 = transmute(vdups_lane_s32::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdups_laneq_s32() { + let a: i32x4 = i32x4::new(1, 1, 1, 4); + let e: i32 = 1; + let r: i32 = transmute(vdups_laneq_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupd_lane_s64() { + let a: i64x1 = i64x1::new(1); + let e: i64 = 1; + let r: i64 = transmute(vdupd_lane_s64::<0>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupd_laneq_s64() { + let a: i64x2 = i64x2::new(1, 1); + let e: i64 = 1; + let r: i64 = transmute(vdupd_laneq_s64::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupb_lane_u8() { + let a: u8x8 = u8x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: u8 = 1; + let r: u8 = transmute(vdupb_lane_u8::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupb_laneq_u8() { + let a: u8x16 = u8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); + let e: u8 = 1; + let r: u8 = transmute(vdupb_laneq_u8::<8>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vduph_lane_u16() { + let a: u16x4 = u16x4::new(1, 1, 1, 4); + let e: u16 = 1; + let r: u16 = transmute(vduph_lane_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vduph_laneq_u16() { + let a: u16x8 = u16x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: u16 = 1; + let r: u16 = transmute(vduph_laneq_u16::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdups_lane_u32() { + let a: u32x2 = u32x2::new(1, 1); + let e: u32 = 1; + let r: u32 = transmute(vdups_lane_u32::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdups_laneq_u32() { + let a: u32x4 = u32x4::new(1, 1, 1, 4); + let e: u32 = 1; + let r: u32 = transmute(vdups_laneq_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupd_lane_u64() { + let a: u64x1 = u64x1::new(1); + let e: u64 = 1; + let r: u64 = transmute(vdupd_lane_u64::<0>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupd_laneq_u64() { + let a: u64x2 = u64x2::new(1, 1); + let e: u64 = 1; + let r: u64 = transmute(vdupd_laneq_u64::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupb_lane_p8() { + let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: p8 = 1; + let r: p8 = transmute(vdupb_lane_p8::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupb_laneq_p8() { + let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); + let e: p8 = 1; + let r: p8 = transmute(vdupb_laneq_p8::<8>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vduph_lane_p16() { + let a: i16x4 = i16x4::new(1, 1, 1, 4); + let e: p16 = 1; + let r: p16 = transmute(vduph_lane_p16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vduph_laneq_p16() { + let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: p16 = 1; + let r: p16 = transmute(vduph_laneq_p16::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdups_lane_f32() { + let a: f32x2 = f32x2::new(1., 1.); + let e: f32 = 1.; + let r: f32 = transmute(vdups_lane_f32::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdups_laneq_f32() { + let a: f32x4 = f32x4::new(1., 1., 1., 4.); + let e: f32 = 1.; + let r: f32 = transmute(vdups_laneq_f32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupd_lane_f64() { + let a: f64 = 1.; + let e: f64 = 1.; + let r: f64 = transmute(vdupd_lane_f64::<0>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupd_laneq_f64() { + let a: f64x2 = f64x2::new(1., 1.); + let e: f64 = 1.; + let r: f64 = transmute(vdupd_laneq_f64::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] unsafe fn test_vextq_p64() { let a: i64x2 = i64x2::new(0, 8); let b: i64x2 = i64x2::new(9, 11); diff --git a/library/stdarch/crates/core_arch/src/arm/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm/neon/generated.rs index 7a8c294e78d..c7fdb80f92e 100644 --- a/library/stdarch/crates/core_arch/src/arm/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/arm/neon/generated.rs @@ -2044,6 +2044,534 @@ pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { simd_cast(a) } +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_lane_s8<const N: i32>(a: int8x8_t) -> int8x8_t { + static_assert_imm3!(N); + simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_laneq_s8<const N: i32>(a: int8x16_t) -> int8x16_t { + static_assert_imm4!(N); + simd_shuffle16(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_lane_s16<const N: i32>(a: int16x4_t) -> int16x4_t { + static_assert_imm2!(N); + simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_laneq_s16<const N: i32>(a: int16x8_t) -> int16x8_t { + static_assert_imm3!(N); + simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_lane_s32<const N: i32>(a: int32x2_t) -> int32x2_t { + static_assert_imm1!(N); + simd_shuffle2(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_laneq_s32<const N: i32>(a: int32x4_t) -> int32x4_t { + static_assert_imm2!(N); + simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_laneq_s8<const N: i32>(a: int8x16_t) -> int8x8_t { + static_assert_imm4!(N); + simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_laneq_s16<const N: i32>(a: int16x8_t) -> int16x4_t { + static_assert_imm3!(N); + simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_laneq_s32<const N: i32>(a: int32x4_t) -> int32x2_t { + static_assert_imm2!(N); + simd_shuffle2(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_lane_s8<const N: i32>(a: int8x8_t) -> int8x16_t { + static_assert_imm3!(N); + simd_shuffle16(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_lane_s16<const N: i32>(a: int16x4_t) -> int16x8_t { + static_assert_imm2!(N); + simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_lane_s32<const N: i32>(a: int32x2_t) -> int32x4_t { + static_assert_imm1!(N); + simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t { + static_assert_imm3!(N); + simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t { + static_assert_imm4!(N); + simd_shuffle16(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t { + static_assert_imm2!(N); + simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t { + static_assert_imm3!(N); + simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t { + static_assert_imm1!(N); + simd_shuffle2(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t { + static_assert_imm2!(N); + simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x8_t { + static_assert_imm4!(N); + simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x4_t { + static_assert_imm3!(N); + simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x2_t { + static_assert_imm2!(N); + simd_shuffle2(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x16_t { + static_assert_imm3!(N); + simd_shuffle16(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x8_t { + static_assert_imm2!(N); + simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x4_t { + static_assert_imm1!(N); + simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x8_t { + static_assert_imm3!(N); + simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x16_t { + static_assert_imm4!(N); + simd_shuffle16(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x4_t { + static_assert_imm2!(N); + simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x8_t { + static_assert_imm3!(N); + simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x8_t { + static_assert_imm4!(N); + simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x4_t { + static_assert_imm3!(N); + simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x16_t { + static_assert_imm3!(N); + simd_shuffle16(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x8_t { + static_assert_imm2!(N); + simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_laneq_s64<const N: i32>(a: int64x2_t) -> int64x2_t { + static_assert_imm1!(N); + simd_shuffle2(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 0))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_lane_s64<const N: i32>(a: int64x1_t) -> int64x2_t { + static_assert!(N : i32 where N == 0); + simd_shuffle2(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t { + static_assert_imm1!(N); + simd_shuffle2(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 0))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x2_t { + static_assert!(N : i32 where N == 0); + simd_shuffle2(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_lane_f32<const N: i32>(a: float32x2_t) -> float32x2_t { + static_assert_imm1!(N); + simd_shuffle2(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_laneq_f32<const N: i32>(a: float32x4_t) -> float32x4_t { + static_assert_imm2!(N); + simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_laneq_f32<const N: i32>(a: float32x4_t) -> float32x2_t { + static_assert_imm2!(N); + simd_shuffle2(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdupq_lane_f32<const N: i32>(a: float32x2_t) -> float32x4_t { + static_assert_imm1!(N); + simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, N = 0))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_lane_s64<const N: i32>(a: int64x1_t) -> int64x1_t { + static_assert!(N : i32 where N == 0); + a +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, N = 0))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t { + static_assert!(N : i32 where N == 0); + a +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, N = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_laneq_s64<const N: i32>(a: int64x2_t) -> int64x1_t { + static_assert_imm1!(N); + transmute::<i64, _>(simd_extract(a, N as u32)) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, N = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vdup_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x1_t { + static_assert_imm1!(N); + transmute::<u64, _>(simd_extract(a, N as u32)) +} + /// Extract vector from pair of vectors #[inline] #[target_feature(enable = "neon")] @@ -10378,6 +10906,358 @@ mod test { } #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_s8() { + let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x8 = transmute(vdup_lane_s8::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_s8() { + let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); + let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x16 = transmute(vdupq_laneq_s8::<8>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_s16() { + let a: i16x4 = i16x4::new(1, 1, 1, 4); + let e: i16x4 = i16x4::new(1, 1, 1, 1); + let r: i16x4 = transmute(vdup_lane_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_s16() { + let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i16x8 = transmute(vdupq_laneq_s16::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_s32() { + let a: i32x2 = i32x2::new(1, 1); + let e: i32x2 = i32x2::new(1, 1); + let r: i32x2 = transmute(vdup_lane_s32::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_s32() { + let a: i32x4 = i32x4::new(1, 1, 1, 4); + let e: i32x4 = i32x4::new(1, 1, 1, 1); + let r: i32x4 = transmute(vdupq_laneq_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_s8() { + let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); + let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x8 = transmute(vdup_laneq_s8::<8>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_s16() { + let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i16x4 = i16x4::new(1, 1, 1, 1); + let r: i16x4 = transmute(vdup_laneq_s16::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_s32() { + let a: i32x4 = i32x4::new(1, 1, 1, 4); + let e: i32x2 = i32x2::new(1, 1); + let r: i32x2 = transmute(vdup_laneq_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_s8() { + let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x16 = transmute(vdupq_lane_s8::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_s16() { + let a: i16x4 = i16x4::new(1, 1, 1, 4); + let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i16x8 = transmute(vdupq_lane_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_s32() { + let a: i32x2 = i32x2::new(1, 1); + let e: i32x4 = i32x4::new(1, 1, 1, 1); + let r: i32x4 = transmute(vdupq_lane_s32::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_u8() { + let a: u8x8 = u8x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: u8x8 = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: u8x8 = transmute(vdup_lane_u8::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_u8() { + let a: u8x16 = u8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r: u8x16 = transmute(vdupq_laneq_u8::<8>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_u16() { + let a: u16x4 = u16x4::new(1, 1, 1, 4); + let e: u16x4 = u16x4::new(1, 1, 1, 1); + let r: u16x4 = transmute(vdup_lane_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_u16() { + let a: u16x8 = u16x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: u16x8 = transmute(vdupq_laneq_u16::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_u32() { + let a: u32x2 = u32x2::new(1, 1); + let e: u32x2 = u32x2::new(1, 1); + let r: u32x2 = transmute(vdup_lane_u32::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_u32() { + let a: u32x4 = u32x4::new(1, 1, 1, 4); + let e: u32x4 = u32x4::new(1, 1, 1, 1); + let r: u32x4 = transmute(vdupq_laneq_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_u8() { + let a: u8x16 = u8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); + let e: u8x8 = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: u8x8 = transmute(vdup_laneq_u8::<8>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_u16() { + let a: u16x8 = u16x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: u16x4 = u16x4::new(1, 1, 1, 1); + let r: u16x4 = transmute(vdup_laneq_u16::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_u32() { + let a: u32x4 = u32x4::new(1, 1, 1, 4); + let e: u32x2 = u32x2::new(1, 1); + let r: u32x2 = transmute(vdup_laneq_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_u8() { + let a: u8x8 = u8x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r: u8x16 = transmute(vdupq_lane_u8::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_u16() { + let a: u16x4 = u16x4::new(1, 1, 1, 4); + let e: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: u16x8 = transmute(vdupq_lane_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_u32() { + let a: u32x2 = u32x2::new(1, 1); + let e: u32x4 = u32x4::new(1, 1, 1, 1); + let r: u32x4 = transmute(vdupq_lane_u32::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_p8() { + let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x8 = transmute(vdup_lane_p8::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_p8() { + let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); + let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x16 = transmute(vdupq_laneq_p8::<8>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_p16() { + let a: i16x4 = i16x4::new(1, 1, 1, 4); + let e: i16x4 = i16x4::new(1, 1, 1, 1); + let r: i16x4 = transmute(vdup_lane_p16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_p16() { + let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i16x8 = transmute(vdupq_laneq_p16::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_p8() { + let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); + let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x8 = transmute(vdup_laneq_p8::<8>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_p16() { + let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i16x4 = i16x4::new(1, 1, 1, 1); + let r: i16x4 = transmute(vdup_laneq_p16::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_p8() { + let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x16 = transmute(vdupq_lane_p8::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_p16() { + let a: i16x4 = i16x4::new(1, 1, 1, 4); + let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i16x8 = transmute(vdupq_lane_p16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_s64() { + let a: i64x2 = i64x2::new(1, 1); + let e: i64x2 = i64x2::new(1, 1); + let r: i64x2 = transmute(vdupq_laneq_s64::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_s64() { + let a: i64x1 = i64x1::new(1); + let e: i64x2 = i64x2::new(1, 1); + let r: i64x2 = transmute(vdupq_lane_s64::<0>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_u64() { + let a: u64x2 = u64x2::new(1, 1); + let e: u64x2 = u64x2::new(1, 1); + let r: u64x2 = transmute(vdupq_laneq_u64::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_u64() { + let a: u64x1 = u64x1::new(1); + let e: u64x2 = u64x2::new(1, 1); + let r: u64x2 = transmute(vdupq_lane_u64::<0>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_f32() { + let a: f32x2 = f32x2::new(1., 1.); + let e: f32x2 = f32x2::new(1., 1.); + let r: f32x2 = transmute(vdup_lane_f32::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_f32() { + let a: f32x4 = f32x4::new(1., 1., 1., 4.); + let e: f32x4 = f32x4::new(1., 1., 1., 1.); + let r: f32x4 = transmute(vdupq_laneq_f32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_f32() { + let a: f32x4 = f32x4::new(1., 1., 1., 4.); + let e: f32x2 = f32x2::new(1., 1.); + let r: f32x2 = transmute(vdup_laneq_f32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_f32() { + let a: f32x2 = f32x2::new(1., 1.); + let e: f32x4 = f32x4::new(1., 1., 1., 1.); + let r: f32x4 = transmute(vdupq_lane_f32::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_s64() { + let a: i64x1 = i64x1::new(0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vdup_lane_s64::<0>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_u64() { + let a: u64x1 = u64x1::new(0); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vdup_lane_u64::<0>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_s64() { + let a: i64x2 = i64x2::new(0, 1); + let e: i64x1 = i64x1::new(1); + let r: i64x1 = transmute(vdup_laneq_s64::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_u64() { + let a: u64x2 = u64x2::new(0, 1); + let e: u64x1 = u64x1::new(1); + let r: u64x1 = transmute(vdup_laneq_u64::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] unsafe fn test_vext_s8() { let a: i8x8 = i8x8::new(0, 8, 8, 9, 8, 9, 9, 11); let b: i8x8 = i8x8::new(9, 11, 14, 15, 16, 17, 18, 19); diff --git a/library/stdarch/crates/stdarch-gen/neon.spec b/library/stdarch/crates/stdarch-gen/neon.spec index 460b236bd7e..7e86ed447b0 100644 --- a/library/stdarch/crates/stdarch-gen/neon.spec +++ b/library/stdarch/crates/stdarch-gen/neon.spec @@ -890,6 +890,148 @@ aarch64 = fcvtpu link-aarch64 = fcvtpu._EXT2_._EXT_ generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t +/// Set all vector lanes to the same value +name = vdup +lane-suffixes +constn = N +multi_fn = static_assert_imm-in_exp_len-N +multi_fn = simd_shuffle-out_len-noext, a, a, {dup-out_len-N as u32} +a = 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16 +n = HFLEN +validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + +aarch64 = dup +generate poly64x2_t, poly64x1_t:poly64x2_t + +arm = vdup.l +generate int*_t +generate int8x16_t:int8x8_t, int16x8_t:int16x4_t, int32x4_t:int32x2_t +generate int8x8_t:int8x16_t, int16x4_t:int16x8_t, int32x2_t:int32x4_t + +generate uint*_t +generate uint8x16_t:uint8x8_t, uint16x8_t:uint16x4_t, uint32x4_t:uint32x2_t +generate uint8x8_t:uint8x16_t, uint16x4_t:uint16x8_t, uint32x2_t:uint32x4_t + +generate poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t +generate poly8x16_t:poly8x8_t, poly16x8_t:poly16x4_t +generate poly8x8_t:poly8x16_t, poly16x4_t:poly16x8_t + +/// Set all vector lanes to the same value +name = vdup +lane-suffixes +constn = N +multi_fn = static_assert_imm-in_exp_len-N +multi_fn = simd_shuffle-out_len-noext, a, a, {dup-out_len-N as u32} +a = 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16 +n = HFLEN +validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + +aarch64 = dup +arm = vmov +generate int64x2_t, int64x1_t:int64x2_t, uint64x2_t, uint64x1_t:uint64x2_t + +/// Set all vector lanes to the same value +name = vdup +lane-suffixes +constn = N +multi_fn = static_assert_imm-in_exp_len-N +multi_fn = simd_shuffle-out_len-noext, a, a, {dup-out_len-N as u32} +a = 1., 1., 1., 4. +n = HFLEN +validate 1., 1., 1., 1. + +aarch64 = dup +generate float64x2_t, float64x1_t:float64x2_t + +arm = vdup.l +generate float*_t, float32x4_t:float32x2_t, float32x2_t:float32x4_t + +/// Set all vector lanes to the same value +name = vdup +lane-suffixes +constn = N +multi_fn = static_assert_imm-in_exp_len-N +multi_fn = a +a = 0 +n = HFLEN +validate 0 + +aarch64 = nop +generate poly64x1_t + +arm = nop +generate int64x1_t, uint64x1_t + +/// Set all vector lanes to the same value +name = vdup +lane-suffixes +constn = N +multi_fn = static_assert_imm-in_exp_len-N +multi_fn = a +a = 0. +n = HFLEN +validate 0. + +aarch64 = nop +generate float64x1_t + +/// Set all vector lanes to the same value +name = vdup +lane-suffixes +constn = N +multi_fn = static_assert_imm-in_exp_len-N +multi_fn = transmute--<element_t _>, {simd_extract, a, N as u32} +a = 0, 1 +n = HFLEN +validate 1 + +aarch64 = nop +generate poly64x2_t:poly64x1_t + +arm = vmov +generate int64x2_t:int64x1_t, uint64x2_t:uint64x1_t + +/// Set all vector lanes to the same value +name = vdup +lane-suffixes +constn = N +multi_fn = static_assert_imm-in_exp_len-N +multi_fn = transmute--<element_t _>, {simd_extract, a, N as u32} +a = 0., 1. +n = HFLEN +validate 1. + +aarch64 = nop +generate float64x2_t:float64x1_t + +/// Set all vector lanes to the same value +name = vdup +lane-suffixes +constn = N +multi_fn = static_assert_imm-in_exp_len-N +multi_fn = simd_extract, a, N as u32 +a = 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16 +n = HFLEN +validate 1 + +aarch64 = nop +generate int8x8_t:i8, int8x16_t:i8, int16x4_t:i16, int16x8_t:i16, int32x2_t:i32, int32x4_t:i32, int64x1_t:i64, int64x2_t:i64 +generate uint8x8_t:u8, uint8x16_t:u8, uint16x4_t:u16, uint16x8_t:u16, uint32x2_t:u32, uint32x4_t:u32, uint64x1_t:u64, uint64x2_t:u64 +generate poly8x8_t:p8, poly8x16_t:p8, poly16x4_t:p16, poly16x8_t:p16 + +/// Set all vector lanes to the same value +name = vdup +lane-suffixes +constn = N +multi_fn = static_assert_imm-in_exp_len-N +multi_fn = simd_extract, a, N as u32 +a = 1., 1., 1., 4. +n = HFLEN +validate 1. + +aarch64 = nop +generate float32x2_t:f32, float32x4_t:f32, float64x1_t:f64, float64x2_t:f64 + /// Extract vector from pair of vectors name = vext constn = N diff --git a/library/stdarch/crates/stdarch-gen/src/main.rs b/library/stdarch/crates/stdarch-gen/src/main.rs index 45e49eb3d24..418347461c5 100644 --- a/library/stdarch/crates/stdarch-gen/src/main.rs +++ b/library/stdarch/crates/stdarch-gen/src/main.rs @@ -80,6 +80,8 @@ fn type_len(t: &str) -> usize { "poly16x8_t" => 8, "poly64x1_t" => 1, "poly64x2_t" => 2, + "i8" | "i16" | "i32" | "i64" | "u8" | "u16" | "u32" | "u64" | "f32" | "f64" | "p8" + | "p16" => 1, _ => panic!("unknown type: {}", t), } } @@ -168,6 +170,18 @@ fn type_to_suffix(t: &str) -> &str { "poly16x8_t" => "q_p16", "poly64x1_t" => "_p64", "poly64x2_t" => "q_p64", + "i8" => "b_s8", + "i16" => "h_s16", + "i32" => "s_s32", + "i64" => "d_s64", + "u8" => "b_u8", + "u16" => "h_u16", + "u32" => "s_u32", + "u64" => "d_u64", + "f32" => "s_f32", + "f64" => "d_f64", + "p8" => "b_p8", + "p16" => "h_p16", _ => panic!("unknown type: {}", t), } } @@ -206,6 +220,17 @@ fn type_to_n_suffix(t: &str) -> &str { } } +fn type_to_lane_suffixes<'a>(out_t: &'a str, in_t: &'a str) -> String { + let mut str = String::new(); + let suf = type_to_suffix(out_t); + if !suf.starts_with("_") { + str.push_str(&suf[0..1]); + } + str.push_str("_lane"); + str.push_str(type_to_suffix(in_t)); + str +} + fn type_to_signed_suffix(t: &str) -> &str { match t { "int8x8_t" | "uint8x8_t" | "poly8x8_t" => "_s8", @@ -299,6 +324,7 @@ enum Suffix { NoQDouble, NSuffix, OutSuffix, + Lane, } #[derive(Clone, Copy)] @@ -337,39 +363,51 @@ fn type_to_global_type(t: &str) -> &str { "poly16x8_t" => "i16x8", "poly64x1_t" => "i64x1", "poly64x2_t" => "i64x2", + "i8" => "i8", + "i16" => "i16", + "i32" => "i32", + "i64" => "i64", + "u8" => "u8", + "u16" => "u16", + "u32" => "u32", + "u64" => "u64", + "f32" => "f32", + "f64" => "f64", + "p8" => "p8", + "p16" => "p16", _ => panic!("unknown type: {}", t), } } -// fn type_to_native_type(t: &str) -> &str { -// match t { -// "int8x8_t" => "i8", -// "int8x16_t" => "i8", -// "int16x4_t" => "i16", -// "int16x8_t" => "i16", -// "int32x2_t" => "i32", -// "int32x4_t" => "i32", -// "int64x1_t" => "i64", -// "int64x2_t" => "i64", -// "uint8x8_t" => "u8", -// "uint8x16_t" => "u8", -// "uint16x4_t" => "u16", -// "uint16x8_t" => "u16", -// "uint32x2_t" => "u32", -// "uint32x4_t" => "u32", -// "uint64x1_t" => "u64", -// "uint64x2_t" => "u64", -// "float16x4_t" => "f16", -// "float16x8_t" => "f16", -// "float32x2_t" => "f32", -// "float32x4_t" => "f32", -// "float64x1_t" => "f64", -// "float64x2_t" => "f64", -// "poly64x1_t" => "i64", -// "poly64x2_t" => "i64", -// _ => panic!("unknown type: {}", t), -// } -// } +fn type_to_native_type(t: &str) -> &str { + match t { + "int8x8_t" => "i8", + "int8x16_t" => "i8", + "int16x4_t" => "i16", + "int16x8_t" => "i16", + "int32x2_t" => "i32", + "int32x4_t" => "i32", + "int64x1_t" => "i64", + "int64x2_t" => "i64", + "uint8x8_t" => "u8", + "uint8x16_t" => "u8", + "uint16x4_t" => "u16", + "uint16x8_t" => "u16", + "uint32x2_t" => "u32", + "uint32x4_t" => "u32", + "uint64x1_t" => "u64", + "uint64x2_t" => "u64", + "float16x4_t" => "f16", + "float16x8_t" => "f16", + "float32x2_t" => "f32", + "float32x4_t" => "f32", + "float64x1_t" => "f64", + "float64x2_t" => "f64", + "poly64x1_t" => "u64", + "poly64x2_t" => "u64", + _ => panic!("unknown type: {}", t), + } +} fn type_to_ext(t: &str) -> &str { match t { @@ -731,6 +769,7 @@ fn gen_aarch64( ), NSuffix => format!("{}{}", current_name, type_to_n_suffix(in_t[1])), OutSuffix => format!("{}{}", current_name, type_to_suffix(out_t)), + Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])), }; let current_fn = if let Some(current_fn) = current_fn.clone() { if link_aarch64.is_some() { @@ -1048,6 +1087,7 @@ fn gen_arm( ), NSuffix => format!("{}{}", current_name, type_to_n_suffix(in_t[1])), OutSuffix => format!("{}{}", current_name, type_to_suffix(out_t)), + Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])), }; let current_aarch64 = current_aarch64 .clone() @@ -1325,6 +1365,39 @@ fn expand_intrinsic(intr: &str, t: &str) -> String { _ => panic!("unknown type for extension: {}", t), }; format!(r#""{}{}""#, &intr[..intr.len() - 1], ext) + } else if intr.ends_with(".l") { + let ext = match t { + "int8x8_t" => "8", + "int8x16_t" => "8", + "int16x4_t" => "16", + "int16x8_t" => "16", + "int32x2_t" => "32", + "int32x4_t" => "32", + "int64x1_t" => "64", + "int64x2_t" => "64", + "uint8x8_t" => "8", + "uint8x16_t" => "8", + "uint16x4_t" => "16", + "uint16x8_t" => "16", + "uint32x2_t" => "32", + "uint32x4_t" => "32", + "uint64x1_t" => "64", + "uint64x2_t" => "64", + "poly8x8_t" => "8", + "poly8x16_t" => "8", + "poly16x4_t" => "16", + "poly16x8_t" => "16", + "float16x4_t" => "16", + "float16x8_t" => "16", + "float32x2_t" => "32", + "float32x4_t" => "32", + "float64x1_t" => "64", + "float64x2_t" => "64", + "poly64x1_t" => "64", + "poly64x2_t" => "64", + _ => panic!("unknown type for extension: {}", t), + }; + format!(r#""{}{}""#, &intr[..intr.len() - 1], ext) } else { intr.to_string() } @@ -1341,6 +1414,9 @@ fn get_call( let params: Vec<_> = in_str.split(',').map(|v| v.trim().to_string()).collect(); assert!(params.len() > 0); let mut fn_name = params[0].clone(); + if fn_name == "a" { + return String::from("a"); + } if fn_name == "transpose-1-in_len" { return transpose1(type_len(in_t[1])).to_string(); } @@ -1353,18 +1429,36 @@ fn get_call( if fn_name == "zip-2-in_len" { return zip2(type_len(in_t[1])).to_string(); } + if fn_name.starts_with("dup") { + let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect(); + let len = match &*fn_format[1] { + "out_len" => type_len(out_t), + "in_len" => type_len(in_t[1]), + "halflen" => type_len(in_t[1]) / 2, + _ => 0, + }; + let mut s = String::from("["); + for i in 0..len { + if i != 0 { + s.push_str(", "); + } + s.push_str(&fn_format[2]); + } + s.push_str("]"); + return s; + } if fn_name.starts_with("asc") { let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect(); let start = match &*fn_format[1] { "0" => 0, "n" => n.unwrap(), - "halflen" => type_half_len_str(in_t[1]).parse::<i32>().unwrap(), + "halflen" => (type_len(in_t[1]) / 2) as i32, s => s.parse::<i32>().unwrap(), }; let len = match &*fn_format[2] { "out_len" => type_len(out_t), "in_len" => type_len(in_t[1]), - "halflen" => type_half_len_str(in_t[1]).parse::<usize>().unwrap(), + "halflen" => type_len(in_t[1]) / 2, _ => 0, }; return asc(start, len); @@ -1378,7 +1472,14 @@ fn get_call( "in_bits_exp_len" => type_bits_exp_len(in_t[1]), _ => 0, }; - return format!(r#"static_assert_imm{}!({});"#, len, fn_format[2]); + if len == 0 { + return format!( + r#"static_assert!({} : i32 where {} == 0);"#, + fn_format[2], fn_format[2] + ); + } else { + return format!(r#"static_assert_imm{}!({});"#, len, fn_format[2]); + } } if fn_name.starts_with("static_assert") { let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect(); @@ -1396,10 +1497,17 @@ fn get_call( } else { fn_format[3].clone() }; - return format!( - r#"static_assert!({} : i32 where {} >= {} && {} <= {});"#, - fn_format[1], fn_format[1], lim1, fn_format[1], lim2 - ); + if lim1 == lim2 { + return format!( + r#"static_assert!({} : i32 where {} == {});"#, + fn_format[1], fn_format[1], lim1 + ); + } else { + return format!( + r#"static_assert!({} : i32 where {} >= {} && {} <= {});"#, + fn_format[1], fn_format[1], lim1, fn_format[1], lim2 + ); + } } if fn_name.starts_with("matchn") { let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect(); @@ -1531,6 +1639,8 @@ fn get_call( fn_name.push_str(&type_len(in_t[1]).to_string()); } else if fn_format[1] == "out_len" { fn_name.push_str(&type_len(out_t).to_string()); + } else if fn_format[1] == "halflen" { + fn_name.push_str(&(type_len(in_t[1]) / 2).to_string()); } else if fn_format[1] == "nout" { fn_name.push_str(type_to_n_suffix(out_t)); } else { @@ -1539,6 +1649,24 @@ fn get_call( if fn_format[2] == "ext" { fn_name.push_str("_"); } else if fn_format[2] == "noext" { + } else if fn_format[2].starts_with("<") { + assert!(fn_format[2].ends_with(">")); + let types: Vec<_> = fn_format[2][1..fn_format[2].len() - 1] + .split(' ') + .map(|v| v.to_string()) + .collect(); + assert_eq!(types.len(), 2); + let type1 = if types[0] == "element_t" { + type_to_native_type(in_t[1]) + } else { + &types[0] + }; + let type2 = if types[1] == "element_t" { + type_to_native_type(in_t[1]) + } else { + &types[1] + }; + fn_name.push_str(&format!("::<{}, {}>", type1, type2)); } else { fn_name.push_str(&fn_format[2]); } @@ -1690,6 +1818,8 @@ mod test { suffix = NSuffix; } else if line.starts_with("out-suffix") { suffix = OutSuffix; + } else if line.starts_with("lane-suffixes") { + suffix = Lane; } else if line.starts_with("a = ") { a = line[4..].split(',').map(|v| v.trim().to_string()).collect(); } else if line.starts_with("b = ") { diff --git a/library/stdarch/crates/stdarch-test/src/lib.rs b/library/stdarch/crates/stdarch-test/src/lib.rs index 0275b8d16dc..70797e17c81 100644 --- a/library/stdarch/crates/stdarch-test/src/lib.rs +++ b/library/stdarch/crates/stdarch-test/src/lib.rs @@ -76,8 +76,12 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) { instrs = &instrs[..instrs.len() - 1]; } - // If the expected intrinsic is a nop it is compiled away so we - // can't check for it - aka the intrinsic is not generating any code + // There are two cases when the expected instruction is nop: + // 1. The expected intrinsic is compiled away so we can't + // check for it - aka the intrinsic is not generating any code. + // 2. It is a mark, indicating that the instruction will be + // compiled into other instructions - mainly because of llvm + // optimization. if expected == "nop" { return; } |
