diff options
| author | Sparrow Li <liyuan179@huawei.com> | 2021-03-17 17:34:21 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-03-17 09:34:21 +0000 |
| commit | 4773f9b1d26194210eb8885369df9c43089795be (patch) | |
| tree | f00df2cd88ddc50c5505fda59e24f75c5305f4df | |
| parent | a1e151e8389c983196ae62027174151861f980f7 (diff) | |
| download | rust-4773f9b1d26194210eb8885369df9c43089795be.tar.gz rust-4773f9b1d26194210eb8885369df9c43089795be.zip | |
Support three parameters in the code generator and add vmla and vmls instructions (#1088)
| -rw-r--r-- | library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs | 72 | ||||
| -rw-r--r-- | library/stdarch/crates/core_arch/src/arm/neon/generated.rs | 560 | ||||
| -rw-r--r-- | library/stdarch/crates/stdarch-gen/neon.spec | 52 | ||||
| -rw-r--r-- | library/stdarch/crates/stdarch-gen/src/main.rs | 292 |
4 files changed, 864 insertions, 112 deletions
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index 82b4e8fc2a0..6ede0275b4e 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -1601,6 +1601,38 @@ pub unsafe fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t { vcvtpq_u64_f64_(a) } +/// Floating-point multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmul))] +pub unsafe fn vmla_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t { + simd_add(a, simd_mul(b, c)) +} + +/// Floating-point multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmul))] +pub unsafe fn vmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t { + simd_add(a, simd_mul(b, c)) +} + +/// Floating-point multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmul))] +pub unsafe fn vmls_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Floating-point multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmul))] +pub unsafe fn vmlsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t { + simd_sub(a, simd_mul(b, c)) +} + /// Multiply #[inline] #[target_feature(enable = "neon")] @@ -3207,6 +3239,46 @@ mod test { } #[simd_test(enable = "neon")] + unsafe fn test_vmla_f64() { + let a: f64 = 0.; + let b: f64 = 2.; + let c: f64 = 3.; + let e: f64 = 6.; + let r: f64 = transmute(vmla_f64(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_f64() { + let a: f64x2 = f64x2::new(0., 1.); + let b: f64x2 = f64x2::new(2., 2.); + let c: f64x2 = f64x2::new(3., 3.); + let e: f64x2 = f64x2::new(6., 7.); + let r: f64x2 = transmute(vmlaq_f64(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_f64() { + let a: f64 = 6.; + let b: f64 = 2.; + let c: f64 = 3.; + let e: f64 = 0.; + let r: f64 = transmute(vmls_f64(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_f64() { + let a: f64x2 = f64x2::new(6., 7.); + let b: f64x2 = f64x2::new(2., 2.); + let c: f64x2 = f64x2::new(3., 3.); + let e: f64x2 = f64x2::new(0., 1.); + let r: f64x2 = transmute(vmlsq_f64(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] unsafe fn test_vmul_f64() { let a: f64 = 1.0; let b: f64 = 2.0; diff --git a/library/stdarch/crates/core_arch/src/arm/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm/neon/generated.rs index e395d511ea2..f2100b1975e 100644 --- a/library/stdarch/crates/core_arch/src/arm/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/arm/neon/generated.rs @@ -1981,6 +1981,286 @@ pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { simd_cast(a) } +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmla_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmlaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmla_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmlaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmla_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + simd_add(a, simd_mul(b, c)) +} + +/// Floating-point multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +pub unsafe fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + simd_add(a, simd_mul(b, c)) +} + +/// Floating-point multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +pub unsafe fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmls_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Floating-point multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +pub unsafe fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Floating-point multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +pub unsafe fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + simd_sub(a, simd_mul(b, c)) +} + /// Saturating subtract #[inline] #[target_feature(enable = "neon")] @@ -5558,6 +5838,286 @@ mod test { } #[simd_test(enable = "neon")] + unsafe fn test_vmla_s8() { + let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: i8x8 = i8x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: i8x8 = transmute(vmla_s8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_s8() { + let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let c: i8x16 = i8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); + let e: i8x16 = i8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); + let r: i8x16 = transmute(vmlaq_s8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x4 = i16x4::new(3, 3, 3, 3); + let e: i16x4 = i16x4::new(6, 7, 8, 9); + let r: i16x4 = transmute(vmla_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: i16x8 = transmute(vmlaq_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_s32() { + let a: i32x2 = i32x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x2 = i32x2::new(3, 3); + let e: i32x2 = i32x2::new(6, 7); + let r: i32x2 = transmute(vmla_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let c: i32x4 = i32x4::new(3, 3, 3, 3); + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlaq_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_u8() { + let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: u8x8 = u8x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: u8x8 = transmute(vmla_u8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_u8() { + let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let c: u8x16 = u8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); + let e: u8x16 = u8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); + let r: u8x16 = transmute(vmlaq_u8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x4 = u16x4::new(3, 3, 3, 3); + let e: u16x4 = u16x4::new(6, 7, 8, 9); + let r: u16x4 = transmute(vmla_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u16x8 = u16x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: u16x8 = transmute(vmlaq_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_u32() { + let a: u32x2 = u32x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x2 = u32x2::new(3, 3); + let e: u32x2 = u32x2::new(6, 7); + let r: u32x2 = transmute(vmla_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u32x4 = u32x4::new(2, 2, 2, 2); + let c: u32x4 = u32x4::new(3, 3, 3, 3); + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlaq_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_f32() { + let a: f32x2 = f32x2::new(0., 1.); + let b: f32x2 = f32x2::new(2., 2.); + let c: f32x2 = f32x2::new(3., 3.); + let e: f32x2 = f32x2::new(6., 7.); + let r: f32x2 = transmute(vmla_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_f32() { + let a: f32x4 = f32x4::new(0., 1., 2., 3.); + let b: f32x4 = f32x4::new(2., 2., 2., 2.); + let c: f32x4 = f32x4::new(3., 3., 3., 3.); + let e: f32x4 = f32x4::new(6., 7., 8., 9.); + let r: f32x4 = transmute(vmlaq_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_s8() { + let a: i8x8 = i8x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i8x8 = transmute(vmls_s8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_s8() { + let a: i8x16 = i8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); + let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let c: i8x16 = i8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); + let e: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r: i8x16 = transmute(vmlsq_s8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_s16() { + let a: i16x4 = i16x4::new(6, 7, 8, 9); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x4 = i16x4::new(3, 3, 3, 3); + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vmls_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_s16() { + let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vmlsq_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_s32() { + let a: i32x2 = i32x2::new(6, 7); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x2 = i32x2::new(3, 3); + let e: i32x2 = i32x2::new(0, 1); + let r: i32x2 = transmute(vmls_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_s32() { + let a: i32x4 = i32x4::new(6, 7, 8, 9); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let c: i32x4 = i32x4::new(3, 3, 3, 3); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vmlsq_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_u8() { + let a: u8x8 = u8x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u8x8 = transmute(vmls_u8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_u8() { + let a: u8x16 = u8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); + let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let c: u8x16 = u8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); + let e: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r: u8x16 = transmute(vmlsq_u8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_u16() { + let a: u16x4 = u16x4::new(6, 7, 8, 9); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x4 = u16x4::new(3, 3, 3, 3); + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vmls_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_u16() { + let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u16x8 = u16x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u16x8 = transmute(vmlsq_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_u32() { + let a: u32x2 = u32x2::new(6, 7); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x2 = u32x2::new(3, 3); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vmls_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_u32() { + let a: u32x4 = u32x4::new(6, 7, 8, 9); + let b: u32x4 = u32x4::new(2, 2, 2, 2); + let c: u32x4 = u32x4::new(3, 3, 3, 3); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vmlsq_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_f32() { + let a: f32x2 = f32x2::new(6., 7.); + let b: f32x2 = f32x2::new(2., 2.); + let c: f32x2 = f32x2::new(3., 3.); + let e: f32x2 = f32x2::new(0., 1.); + let r: f32x2 = transmute(vmls_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_f32() { + let a: f32x4 = f32x4::new(6., 7., 8., 9.); + let b: f32x4 = f32x4::new(2., 2., 2., 2.); + let c: f32x4 = f32x4::new(3., 3., 3., 3.); + let e: f32x4 = f32x4::new(0., 1., 2., 3.); + let r: f32x4 = transmute(vmlsq_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] unsafe fn test_vqsub_u8() { let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42); let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); diff --git a/library/stdarch/crates/stdarch-gen/neon.spec b/library/stdarch/crates/stdarch-gen/neon.spec index a30c823186f..e1c34bd32cc 100644 --- a/library/stdarch/crates/stdarch-gen/neon.spec +++ b/library/stdarch/crates/stdarch-gen/neon.spec @@ -758,6 +758,58 @@ aarch64 = fcvtpu link-aarch64 = fcvtpu._EXT2_._EXT_ generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t +/// Multiply-add to accumulator +name = vmla +multi_fn = simd_add, a, {simd_mul, b, c} +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 +validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 + +arm = vmla. +aarch64 = mla +generate int*_t, uint*_t + +/// Floating-point multiply-add to accumulator +name = vmla +multi_fn = simd_add, a, {simd_mul, b, c} +a = 0., 1., 2., 3. +b = 2., 2., 2., 2. +c = 3., 3., 3., 3. +validate 6., 7., 8., 9. + +aarch64 = fmul +generate float64x*_t + +arm = vmla. +generate float*_t + +/// Multiply-subtract from accumulator +name = vmls +multi_fn = simd_sub, a, {simd_mul, b, c} +a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 +validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + +arm = vmls. +aarch64 = mls +generate int*_t, uint*_t + +/// Floating-point multiply-subtract from accumulator +name = vmls +multi_fn = simd_sub, a, {simd_mul, b, c} +a = 6., 7., 8., 9. +b = 2., 2., 2., 2. +c = 3., 3., 3., 3. +validate 0., 1., 2., 3. + +aarch64 = fmul +generate float64x*_t + +arm = vmls. +generate float*_t + /// Saturating subtract name = vqsub a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 diff --git a/library/stdarch/crates/stdarch-gen/src/main.rs b/library/stdarch/crates/stdarch-gen/src/main.rs index fd4eedf8c0d..59073d1f471 100644 --- a/library/stdarch/crates/stdarch-gen/src/main.rs +++ b/library/stdarch/crates/stdarch-gen/src/main.rs @@ -1,3 +1,4 @@ +use self::Suffix::*; use std::env; use std::fs::File; use std::io::prelude::*; @@ -178,6 +179,32 @@ fn type_to_double_suffixes<'a>(out_t: &'a str, in_t: &'a str) -> &'a str { } } +fn type_to_noq_suffix(t: &str) -> &str { + match t { + "int8x8_t" | "int8x16_t" => "_s8", + "int16x4_t" | "int16x8_t" => "_s16", + "int32x2_t" | "int32x4_t" => "_s32", + "int64x1_t" | "int64x2_t" => "_s64", + "uint8x8_t" | "uint8x16_t" => "_u8", + "uint16x4_t" | "uint16x8_t" => "_u16", + "uint32x2_t" | "uint32x4_t" => "_u32", + "uint64x1_t" | "uint64x2_t" => "_u64", + "float16x4_t" | "float16x8_t" => "_f16", + "float32x2_t" | "float32x4_t" => "_f32", + "float64x1_t" | "float64x2_t" => "_f64", + "poly8x8_t" | "poly8x16_t" => "_p8", + "poly64x1_t" | "poly64x2_t" => "_p64", + _ => panic!("unknown type: {}", t), + } +} + +#[derive(Clone, Copy)] +enum Suffix { + Normal, + Double, + NoQ, +} + fn type_to_global_type(t: &str) -> &str { match t { "int8x8_t" => "i8x8", @@ -405,21 +432,24 @@ fn gen_aarch64( current_name: &str, current_aarch64: &Option<String>, link_aarch64: &Option<String>, - in_t: &str, - in_t2: &str, + in_t: &[&str; 3], out_t: &str, - current_tests: &[(Vec<String>, Vec<String>, Vec<String>)], - double_suffixes: bool, + current_tests: &[(Vec<String>, Vec<String>, Vec<String>, Vec<String>)], + suffix: Suffix, para_num: i32, fixed: &Vec<String>, multi_fn: &Vec<String>, ) -> (String, String) { - let _global_t = type_to_global_type(in_t); + let _global_t = type_to_global_type(in_t[0]); let _global_ret_t = type_to_global_type(out_t); - let name = if double_suffixes { - format!("{}{}", current_name, type_to_double_suffixes(out_t, in_t2)) - } else { - format!("{}{}", current_name, type_to_suffix(in_t2)) + let name = match suffix { + Normal => format!("{}{}", current_name, type_to_suffix(in_t[1])), + NoQ => format!("{}{}", current_name, type_to_noq_suffix(in_t[1])), + Double => format!( + "{}{}", + current_name, + type_to_double_suffixes(out_t, in_t[1]) + ), }; let current_fn = if let Some(current_fn) = current_fn.clone() { if link_aarch64.is_some() { @@ -448,7 +478,7 @@ fn gen_aarch64( }; let current_aarch64 = current_aarch64.clone().unwrap(); let ext_c = if let Some(link_aarch64) = link_aarch64.clone() { - let ext = type_to_ext(in_t); + let ext = type_to_ext(in_t[0]); let ext2 = type_to_ext(out_t); format!( r#"#[allow(improper_ctypes)] @@ -461,10 +491,13 @@ fn gen_aarch64( current_fn, match para_num { 1 => { - format!("a: {}", in_t) + format!("a: {}", in_t[0]) } 2 => { - format!("a: {}, b: {}", in_t, in_t2) + format!("a: {}, b: {}", in_t[0], in_t[1]) + } + 3 => { + format!("a: {}, b: {}, c: {}", in_t[0], in_t[1], in_t[2]) } _ => unimplemented!("unknown para_num"), }, @@ -479,58 +512,63 @@ fn gen_aarch64( if i > 0 { calls.push_str("\n "); } - calls.push_str(&get_call( - &multi_fn[i], - current_name, - in_t, - in_t2, - out_t, - fixed, - )); + calls.push_str(&get_call(&multi_fn[i], current_name, in_t, out_t, fixed)); } calls } else { String::new() }; let call = match (multi_calls.len(), para_num, fixed.len()) { - (0, 2, _) => format!( - r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{ - {}{}(a, b) -}}"#, - name, in_t, in_t2, out_t, ext_c, current_fn, - ), (0, 1, 0) => format!( r#"pub unsafe fn {}(a: {}) -> {} {{ {}{}(a) }}"#, - name, in_t, out_t, ext_c, current_fn, + name, in_t[0], out_t, ext_c, current_fn, ), (0, 1, _) => { - let fixed: Vec<String> = fixed.iter().take(type_len(in_t)).cloned().collect(); + let fixed: Vec<String> = fixed.iter().take(type_len(in_t[0])).cloned().collect(); format!( r#"pub unsafe fn {}(a: {}) -> {} {{ let b{}; {}{}(a, transmute(b)) }}"#, name, - in_t, + in_t[0], out_t, - values(in_t, &fixed), + values(in_t[0], &fixed), ext_c, current_fn, ) } + (0, 2, _) => format!( + r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{ + {}{}(a, b) +}}"#, + name, in_t[0], in_t[1], out_t, ext_c, current_fn, + ), + (0, 3, _) => format!( + r#"pub unsafe fn {}(a: {}, b: {}, c: {}) -> {} {{ + {}{}(a, b, c) +}}"#, + name, in_t[0], in_t[1], in_t[2], out_t, ext_c, current_fn, + ), (_, 1, _) => format!( r#"pub unsafe fn {}(a: {}) -> {} {{ {}{} }}"#, - name, in_t, out_t, ext_c, multi_calls, + name, in_t[0], out_t, ext_c, multi_calls, ), (_, 2, _) => format!( r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{ {}{} }}"#, - name, in_t, in_t2, out_t, ext_c, multi_calls, + name, in_t[0], in_t[1], out_t, ext_c, multi_calls, + ), + (_, 3, _) => format!( + r#"pub unsafe fn {}(a: {}, b: {}, c: {}) -> {} {{ + {}{} +}}"#, + name, in_t[0], in_t[1], in_t[2], out_t, ext_c, multi_calls, ), (_, _, _) => String::new(), }; @@ -547,12 +585,10 @@ fn gen_aarch64( let test = gen_test( &name, - &in_t, - &in_t2, + in_t, &out_t, current_tests, - type_len(in_t), - type_len(in_t2), + [type_len(in_t[0]), type_len(in_t[0]), type_len(in_t[0])], type_len(out_t), para_num, ); @@ -561,12 +597,10 @@ fn gen_aarch64( fn gen_test( name: &str, - in_t: &str, - in_t2: &str, + in_t: &[&str; 3], out_t: &str, - current_tests: &[(Vec<String>, Vec<String>, Vec<String>)], - len_in: usize, - len_in2: usize, + current_tests: &[(Vec<String>, Vec<String>, Vec<String>, Vec<String>)], + len_in: [usize; 3], len_out: usize, para_num: i32, ) -> String { @@ -576,9 +610,10 @@ fn gen_test( unsafe fn test_{}() {{"#, name, ); - for (a, b, e) in current_tests { - let a: Vec<String> = a.iter().take(len_in).cloned().collect(); - let b: Vec<String> = b.iter().take(len_in2).cloned().collect(); + for (a, b, c, e) in current_tests { + let a: Vec<String> = a.iter().take(len_in[0]).cloned().collect(); + let b: Vec<String> = b.iter().take(len_in[1]).cloned().collect(); + let c: Vec<String> = c.iter().take(len_in[2]).cloned().collect(); let e: Vec<String> = e.iter().take(len_out).cloned().collect(); let t = { match para_num { @@ -590,7 +625,7 @@ fn gen_test( let r: {} = transmute({}(transmute(a))); assert_eq!(r, e); "#, - values(in_t, &a), + values(in_t[0], &a), values(out_t, &e), type_to_global_type(out_t), name @@ -605,8 +640,26 @@ fn gen_test( let r: {} = transmute({}(transmute(a), transmute(b))); assert_eq!(r, e); "#, - values(in_t, &a), - values(in_t2, &b), + values(in_t[0], &a), + values(in_t[1], &b), + values(out_t, &e), + type_to_global_type(out_t), + name + ) + } + 3 => { + format!( + r#" + let a{}; + let b{}; + let c{}; + let e{}; + let r: {} = transmute({}(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); +"#, + values(in_t[0], &a), + values(in_t[1], &b), + values(in_t[2], &c), values(out_t, &e), type_to_global_type(out_t), name @@ -633,21 +686,24 @@ fn gen_arm( link_arm: &Option<String>, current_aarch64: &Option<String>, link_aarch64: &Option<String>, - in_t: &str, - in_t2: &str, + in_t: &[&str; 3], out_t: &str, - current_tests: &[(Vec<String>, Vec<String>, Vec<String>)], - double_suffixes: bool, + current_tests: &[(Vec<String>, Vec<String>, Vec<String>, Vec<String>)], + suffix: Suffix, para_num: i32, fixed: &Vec<String>, multi_fn: &Vec<String>, ) -> (String, String) { - let _global_t = type_to_global_type(in_t); + let _global_t = type_to_global_type(in_t[0]); let _global_ret_t = type_to_global_type(out_t); - let name = if double_suffixes { - format!("{}{}", current_name, type_to_double_suffixes(out_t, in_t2)) - } else { - format!("{}{}", current_name, type_to_suffix(in_t2)) + let name = match suffix { + Normal => format!("{}{}", current_name, type_to_suffix(in_t[1])), + NoQ => format!("{}{}", current_name, type_to_noq_suffix(in_t[1])), + Double => format!( + "{}{}", + current_name, + type_to_double_suffixes(out_t, in_t[1]) + ), }; let current_aarch64 = current_aarch64 .clone() @@ -680,7 +736,7 @@ fn gen_arm( }; let ext_c = if let (Some(link_arm), Some(link_aarch64)) = (link_arm.clone(), link_aarch64.clone()) { - let ext = type_to_ext(in_t); + let ext = type_to_ext(in_t[0]); let ext2 = type_to_ext(out_t); format!( r#"#[allow(improper_ctypes)] @@ -695,10 +751,13 @@ fn gen_arm( current_fn, match para_num { 1 => { - format!("a: {}", in_t) + format!("a: {}", in_t[0]) } 2 => { - format!("a: {}, b: {}", in_t, in_t2) + format!("a: {}, b: {}", in_t[0], in_t[1]) + } + 3 => { + format!("a: {}, b: {}, c: {}", in_t[0], in_t[1], in_t[2]) } _ => unimplemented!("unknown para_num"), }, @@ -713,58 +772,63 @@ fn gen_arm( if i > 0 { calls.push_str("\n "); } - calls.push_str(&get_call( - &multi_fn[i], - current_name, - in_t, - in_t2, - out_t, - fixed, - )); + calls.push_str(&get_call(&multi_fn[i], current_name, in_t, out_t, fixed)); } calls } else { String::new() }; let call = match (multi_calls.len(), para_num, fixed.len()) { - (0, 2, _) => format!( - r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{ - {}{}(a, b) -}}"#, - name, in_t, in_t2, out_t, ext_c, current_fn, - ), (0, 1, 0) => format!( r#"pub unsafe fn {}(a: {}) -> {} {{ {}{}(a) }}"#, - name, in_t, out_t, ext_c, current_fn, + name, in_t[0], out_t, ext_c, current_fn, ), (0, 1, _) => { - let fixed: Vec<String> = fixed.iter().take(type_len(in_t)).cloned().collect(); + let fixed: Vec<String> = fixed.iter().take(type_len(in_t[0])).cloned().collect(); format!( r#"pub unsafe fn {}(a: {}) -> {} {{ let b{}; {}{}(a, transmute(b)) }}"#, name, - in_t, + in_t[0], out_t, - values(in_t, &fixed), + values(in_t[0], &fixed), ext_c, current_fn, ) } + (0, 2, _) => format!( + r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{ + {}{}(a, b) +}}"#, + name, in_t[0], in_t[1], out_t, ext_c, current_fn, + ), + (0, 3, _) => format!( + r#"pub unsafe fn {}(a: {}, b: {}, c: {}) -> {} {{ + {}{}(a, b) +}}"#, + name, in_t[0], in_t[1], in_t[2], out_t, ext_c, current_fn, + ), (_, 1, _) => format!( r#"pub unsafe fn {}(a: {}) -> {} {{ {}{} }}"#, - name, in_t, out_t, ext_c, multi_calls, + name, in_t[0], out_t, ext_c, multi_calls, ), (_, 2, _) => format!( r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{ {}{} }}"#, - name, in_t, in_t2, out_t, ext_c, multi_calls, + name, in_t[0], in_t[1], out_t, ext_c, multi_calls, + ), + (_, 3, _) => format!( + r#"pub unsafe fn {}(a: {}, b: {}, c: {}) -> {} {{ + {}{} +}}"#, + name, in_t[0], in_t[1], in_t[2], out_t, ext_c, multi_calls, ), (_, _, _) => String::new(), }; @@ -779,18 +843,16 @@ fn gen_arm( {} "#, current_comment, - expand_intrinsic(¤t_arm, in_t), - expand_intrinsic(¤t_aarch64, in_t), + expand_intrinsic(¤t_arm, in_t[0]), + expand_intrinsic(¤t_aarch64, in_t[0]), call, ); let test = gen_test( &name, - &in_t, - &in_t2, + in_t, &out_t, current_tests, - type_len(in_t), - type_len(in_t2), + [type_len(in_t[0]), type_len(in_t[1]), type_len(in_t[2])], type_len(out_t), para_num, ); @@ -871,8 +933,7 @@ fn expand_intrinsic(intr: &str, t: &str) -> String { fn get_call( in_str: &str, current_name: &str, - in_t: &str, - in_t2: &str, + in_t: &[&str; 3], out_t: &str, fixed: &Vec<String>, ) -> String { @@ -907,7 +968,6 @@ fn get_call( &sub_fn[1..sub_fn.len() - 1], current_name, in_t, - in_t2, out_t, fixed, ); @@ -918,9 +978,9 @@ fn get_call( } else if s.contains(':') { let re_params: Vec<_> = s.split(':').map(|v| v.to_string()).collect(); if re_params[1] == "" { - re = Some((re_params[0].clone(), in_t.to_string())); + re = Some((re_params[0].clone(), in_t[0].to_string())); } else if re_params[1] == "in_t" { - re = Some((re_params[0].clone(), in_t.to_string())); + re = Some((re_params[0].clone(), in_t[0].to_string())); } else if re_params[1] == "out_t" { re = Some((re_params[0].clone(), out_t.to_string())); } else { @@ -936,7 +996,7 @@ fn get_call( } if fn_name == "fixed" { let (re_name, re_type) = re.unwrap(); - let fixed: Vec<String> = fixed.iter().take(type_len(in_t)).cloned().collect(); + let fixed: Vec<String> = fixed.iter().take(type_len(in_t[0])).cloned().collect(); return format!(r#"let {}{};"#, re_name, values(&re_type, &fixed)); } if fn_name.contains('-') { @@ -948,13 +1008,16 @@ fn get_call( fn_format[0].clone() }; if fn_format[1] == "self" { - fn_name.push_str(type_to_suffix(in_t2)); + fn_name.push_str(type_to_suffix(in_t[1])); } else if fn_format[1] == "signed" { - fn_name.push_str(type_to_signed_suffix(in_t2)); + fn_name.push_str(type_to_signed_suffix(in_t[1])); } else if fn_format[1] == "unsigned" { - fn_name.push_str(type_to_unsigned_suffix(in_t2)); + fn_name.push_str(type_to_unsigned_suffix(in_t[1])); } else if fn_format[1] == "doubleself" { - fn_name.push_str(type_to_double_suffixes(out_t, in_t2)); + fn_name.push_str(type_to_double_suffixes(out_t, in_t[1])); + } else if fn_format[1] == "noqself" { + fn_name.push_str(type_to_noq_suffix(in_t[1])); + } else if fn_format[1] == "nosuffix" { } else { fn_name.push_str(&fn_format[1]); }; @@ -991,11 +1054,12 @@ fn main() -> io::Result<()> { let mut link_arm: Option<String> = None; let mut link_aarch64: Option<String> = None; let mut para_num = 2; - let mut double_suffixes = false; + let mut suffix: Suffix = Normal; let mut a: Vec<String> = Vec::new(); let mut b: Vec<String> = Vec::new(); + let mut c: Vec<String> = Vec::new(); let mut fixed: Vec<String> = Vec::new(); - let mut current_tests: Vec<(Vec<String>, Vec<String>, Vec<String>)> = Vec::new(); + let mut current_tests: Vec<(Vec<String>, Vec<String>, Vec<String>, Vec<String>)> = Vec::new(); let mut multi_fn: Vec<String> = Vec::new(); // @@ -1067,9 +1131,10 @@ mod test { link_arm = None; current_tests = Vec::new(); para_num = 2; - double_suffixes = false; + suffix = Normal; a = Vec::new(); b = Vec::new(); + c = Vec::new(); fixed = Vec::new(); multi_fn = Vec::new(); } else if line.starts_with("//") { @@ -1084,16 +1149,20 @@ mod test { } else if line.starts_with("aarch64 = ") { current_aarch64 = Some(String::from(&line[10..])); } else if line.starts_with("double-suffixes") { - double_suffixes = true; + suffix = Double; + } else if line.starts_with("no-q") { + suffix = NoQ; } else if line.starts_with("a = ") { a = line[4..].split(',').map(|v| v.trim().to_string()).collect(); } else if line.starts_with("b = ") { b = line[4..].split(',').map(|v| v.trim().to_string()).collect(); + } else if line.starts_with("c = ") { + c = line[4..].split(',').map(|v| v.trim().to_string()).collect(); } else if line.starts_with("fixed = ") { fixed = line[8..].split(',').map(|v| v.trim().to_string()).collect(); } else if line.starts_with("validate ") { let e = line[9..].split(',').map(|v| v.trim().to_string()).collect(); - current_tests.push((a.clone(), b.clone(), e)); + current_tests.push((a.clone(), b.clone(), c.clone(), e)); } else if line.starts_with("link-aarch64 = ") { link_aarch64 = Some(String::from(&line[15..])); } else if line.starts_with("link-arm = ") { @@ -1116,26 +1185,27 @@ mod test { for line in types { let spec: Vec<&str> = line.split(':').map(|e| e.trim()).collect(); - let in_t; - let in_t2; + let in_t: [&str; 3]; let out_t; if spec.len() == 1 { - in_t = spec[0]; - in_t2 = spec[0]; + in_t = [spec[0], spec[0], spec[0]]; out_t = spec[0]; } else if spec.len() == 2 { - in_t = spec[0]; - in_t2 = spec[0]; + in_t = [spec[0], spec[0], spec[0]]; out_t = spec[1]; } else if spec.len() == 3 { - in_t = spec[0]; - in_t2 = spec[1]; + in_t = [spec[0], spec[1], spec[1]]; out_t = spec[2]; + } else if spec.len() == 4 { + in_t = [spec[0], spec[1], spec[2]]; + out_t = spec[3]; } else { panic!("Bad spec: {}", line) } if b.len() == 0 { para_num = 1; + } else if c.len() != 0 { + para_num = 3; } let current_name = current_name.clone().unwrap(); if let Some(current_arm) = current_arm.clone() { @@ -1148,10 +1218,9 @@ mod test { ¤t_aarch64, &link_aarch64, &in_t, - &in_t2, &out_t, ¤t_tests, - double_suffixes, + suffix, para_num, &fixed, &multi_fn, @@ -1166,10 +1235,9 @@ mod test { ¤t_aarch64, &link_aarch64, &in_t, - &in_t2, &out_t, ¤t_tests, - double_suffixes, + suffix, para_num, &fixed, &multi_fn, |
