diff options
| author | bjorn3 <bjorn3@users.noreply.github.com> | 2019-07-30 15:42:29 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-07-30 15:42:29 +0200 |
| commit | aa803f4fa664fe5e5fb91bd1e8679b37c9dde3f8 (patch) | |
| tree | bf6d5fb531777ca2a2113edaab643e724653bd60 /example | |
| parent | 3f7660788042796773fd12e80dccd9af997f7242 (diff) | |
| parent | 1028fbb68c5f2caaeda4679a59258275b8c3d26e (diff) | |
| download | rust-aa803f4fa664fe5e5fb91bd1e8679b37c9dde3f8.tar.gz rust-aa803f4fa664fe5e5fb91bd1e8679b37c9dde3f8.zip | |
Merge pull request #650 from bjorn3/simd_emulation
Simd emulation
Diffstat (limited to 'example')
| -rw-r--r-- | example/mini_core_hello_world.rs | 10 | ||||
| -rw-r--r-- | example/std_example.rs | 125 |
2 files changed, 135 insertions, 0 deletions
diff --git a/example/mini_core_hello_world.rs b/example/mini_core_hello_world.rs index 641f0c98912..380bc487bcc 100644 --- a/example/mini_core_hello_world.rs +++ b/example/mini_core_hello_world.rs @@ -117,6 +117,14 @@ impl<T: ?Sized, U: ?Sized> CoerceUnsized<Unique<U>> for Unique<T> where T: Unsiz fn take_f32(_f: f32) {} fn take_unique(_u: Unique<()>) {} +fn return_u128_pair() -> (u128, u128) { + (0, 0) +} + +fn call_return_u128_pair() { + return_u128_pair(); +} + fn main() { take_unique(Unique { pointer: 0 as *const (), @@ -124,6 +132,8 @@ fn main() { }); take_f32(0.1); + call_return_u128_pair(); + //return; unsafe { diff --git a/example/std_example.rs b/example/std_example.rs index 2a9df999559..33523a12871 100644 --- a/example/std_example.rs +++ b/example/std_example.rs @@ -1,8 +1,10 @@ #![feature(core_intrinsics)] +use std::arch::x86_64::*; use std::io::Write; use std::intrinsics; + fn main() { let _ = ::std::iter::repeat('a' as u8).take(10).collect::<Vec<_>>(); let stderr = ::std::io::stderr(); @@ -43,6 +45,129 @@ fn main() { assert_eq!(0xFEDCBA987654321123456789ABCDEFu128 >> 64, 0xFEDCBA98765432u128); assert_eq!(0xFEDCBA987654321123456789ABCDEFu128 as i128 >> 64, 0xFEDCBA98765432i128); assert_eq!(353985398u128 * 932490u128, 330087843781020u128); + + unsafe { + test_simd(); + } +} + +#[target_feature(enable = "sse2")] +unsafe fn test_simd() { + let x = _mm_setzero_si128(); + let y = _mm_set1_epi16(7); + let or = _mm_or_si128(x, y); + let cmp_eq = _mm_cmpeq_epi8(y, y); + let cmp_lt = _mm_cmplt_epi8(y, y); + + assert_eq!(std::mem::transmute::<_, [u16; 8]>(or), [7, 7, 7, 7, 7, 7, 7, 7]); + assert_eq!(std::mem::transmute::<_, [u16; 8]>(cmp_eq), [0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff]); + assert_eq!(std::mem::transmute::<_, [u16; 8]>(cmp_lt), [0, 0, 0, 0, 0, 0, 0, 0]); + + test_mm_slli_si128(); + test_mm_movemask_epi8(); + test_mm256_movemask_epi8(); + test_mm_add_epi8(); + test_mm_add_pd(); + + let mask1 = _mm_movemask_epi8(dbg!(_mm_setr_epi8(255u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))); + assert_eq!(mask1, 1); +} + +#[target_feature(enable = "sse2")] +unsafe fn test_mm_slli_si128() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + ); + let r = _mm_slli_si128(a, 1); + let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m128i(r, e); + + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + ); + let r = _mm_slli_si128(a, 15); + let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); + assert_eq_m128i(r, e); + + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + ); + let r = _mm_slli_si128(a, 16); + assert_eq_m128i(r, _mm_set1_epi8(0)); + + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + ); + let r = _mm_slli_si128(a, -1); + assert_eq_m128i(_mm_set1_epi8(0), r); + + #[rustfmt::skip] + let a = _mm_setr_epi8( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + ); + let r = _mm_slli_si128(a, -0x80000000); + assert_eq_m128i(r, _mm_set1_epi8(0)); +} + +#[target_feature(enable = "sse2")] +unsafe fn test_mm_movemask_epi8() { + #[rustfmt::skip] + let a = _mm_setr_epi8( + 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01, + 0b0101, 0b1111_0000u8 as i8, 0, 0, + 0, 0, 0b1111_0000u8 as i8, 0b0101, + 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, + ); + let r = _mm_movemask_epi8(a); + assert_eq!(r, 0b10100100_00100101); +} + +#[target_feature(enable = "avx2")] +unsafe fn test_mm256_movemask_epi8() { + let a = _mm256_set1_epi8(-1); + let r = _mm256_movemask_epi8(a); + let e = -1; + assert_eq!(r, e); +} + +#[target_feature(enable = "sse2")] +unsafe fn test_mm_add_epi8() { + let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #[rustfmt::skip] + let b = _mm_setr_epi8( + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ); + let r = _mm_add_epi8(a, b); + #[rustfmt::skip] + let e = _mm_setr_epi8( + 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, + ); + assert_eq_m128i(r, e); +} + +#[target_feature(enable = "sse2")] +unsafe fn test_mm_add_pd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_add_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0)); +} + +fn assert_eq_m128i(x: std::arch::x86_64::__m128i, y: std::arch::x86_64::__m128i) { + unsafe { + assert_eq!(std::mem::transmute::<_, [u8; 16]>(x), std::mem::transmute::<_, [u8; 16]>(x)); + } +} + +#[target_feature(enable = "sse2")] +pub unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) { + if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 { + panic!("{:?} != {:?}", a, b); + } } #[derive(PartialEq)] |
