From 69526d464fc0ef021beb1718d68035555f30c33d Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Tue, 30 Jul 2019 14:37:20 +0200 Subject: Implement some float simd intrinsics --- example/std_example.rs | 41 +++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) (limited to 'example') diff --git a/example/std_example.rs b/example/std_example.rs index 8a43af5bd80..33523a12871 100644 --- a/example/std_example.rs +++ b/example/std_example.rs @@ -1,5 +1,6 @@ #![feature(core_intrinsics)] +use std::arch::x86_64::*; use std::io::Write; use std::intrinsics; @@ -52,8 +53,6 @@ fn main() { #[target_feature(enable = "sse2")] unsafe fn test_simd() { - use std::arch::x86_64::*; - let x = _mm_setzero_si128(); let y = _mm_set1_epi16(7); let or = _mm_or_si128(x, y); @@ -67,6 +66,8 @@ unsafe fn test_simd() { test_mm_slli_si128(); test_mm_movemask_epi8(); test_mm256_movemask_epi8(); + test_mm_add_epi8(); + test_mm_add_pd(); let mask1 = _mm_movemask_epi8(dbg!(_mm_setr_epi8(255u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))); assert_eq!(mask1, 1); @@ -74,8 +75,6 @@ unsafe fn test_simd() { #[target_feature(enable = "sse2")] unsafe fn test_mm_slli_si128() { - use std::arch::x86_64::*; - #[rustfmt::skip] let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, @@ -116,8 +115,6 @@ unsafe fn test_mm_slli_si128() { #[target_feature(enable = "sse2")] unsafe fn test_mm_movemask_epi8() { - use std::arch::x86_64::*; - #[rustfmt::skip] let a = _mm_setr_epi8( 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01, @@ -131,20 +128,48 @@ unsafe fn test_mm_movemask_epi8() { #[target_feature(enable = "avx2")] unsafe fn test_mm256_movemask_epi8() { - use std::arch::x86_64::*; - let a = _mm256_set1_epi8(-1); let r = _mm256_movemask_epi8(a); let e = -1; assert_eq!(r, e); } +#[target_feature(enable = "sse2")] +unsafe fn test_mm_add_epi8() { + let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm_setr_epi8( + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ); + let r = _mm_add_epi8(a, b); + #[rustfmt::skip] + let e = _mm_setr_epi8( + 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, + ); + assert_eq_m128i(r, e); +} + +#[target_feature(enable = "sse2")] +unsafe fn test_mm_add_pd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_add_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0)); +} + fn assert_eq_m128i(x: std::arch::x86_64::__m128i, y: std::arch::x86_64::__m128i) { unsafe { assert_eq!(std::mem::transmute::<_, [u8; 16]>(x), std::mem::transmute::<_, [u8; 16]>(x)); } } +#[target_feature(enable = "sse2")] +pub unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) { + if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 { + panic!("{:?} != {:?}", a, b); + } +} + #[derive(PartialEq)] enum LoopState { Continue(()), -- cgit 1.4.1-3-g733a5