about summary refs log tree commit diff
path: root/example
diff options
context:
space:
mode:
authorbjorn3 <bjorn3@users.noreply.github.com>2019-07-30 15:42:29 +0200
committerGitHub <noreply@github.com>2019-07-30 15:42:29 +0200
commitaa803f4fa664fe5e5fb91bd1e8679b37c9dde3f8 (patch)
treebf6d5fb531777ca2a2113edaab643e724653bd60 /example
parent3f7660788042796773fd12e80dccd9af997f7242 (diff)
parent1028fbb68c5f2caaeda4679a59258275b8c3d26e (diff)
downloadrust-aa803f4fa664fe5e5fb91bd1e8679b37c9dde3f8.tar.gz
rust-aa803f4fa664fe5e5fb91bd1e8679b37c9dde3f8.zip
Merge pull request #650 from bjorn3/simd_emulation
Simd emulation
Diffstat (limited to 'example')
-rw-r--r--example/mini_core_hello_world.rs10
-rw-r--r--example/std_example.rs125
2 files changed, 135 insertions, 0 deletions
diff --git a/example/mini_core_hello_world.rs b/example/mini_core_hello_world.rs
index 641f0c98912..380bc487bcc 100644
--- a/example/mini_core_hello_world.rs
+++ b/example/mini_core_hello_world.rs
@@ -117,6 +117,14 @@ impl<T: ?Sized, U: ?Sized> CoerceUnsized<Unique<U>> for Unique<T> where T: Unsiz
 fn take_f32(_f: f32) {}
 fn take_unique(_u: Unique<()>) {}
 
+fn return_u128_pair() -> (u128, u128) {
+    (0, 0)
+}
+
+fn call_return_u128_pair() {
+    return_u128_pair();
+}
+
 fn main() {
     take_unique(Unique {
         pointer: 0 as *const (),
@@ -124,6 +132,8 @@ fn main() {
     });
     take_f32(0.1);
 
+    call_return_u128_pair();
+
     //return;
 
     unsafe {
diff --git a/example/std_example.rs b/example/std_example.rs
index 2a9df999559..33523a12871 100644
--- a/example/std_example.rs
+++ b/example/std_example.rs
@@ -1,8 +1,10 @@
 #![feature(core_intrinsics)]
 
+use std::arch::x86_64::*;
 use std::io::Write;
 use std::intrinsics;
 
+
 fn main() {
     let _ = ::std::iter::repeat('a' as u8).take(10).collect::<Vec<_>>();
     let stderr = ::std::io::stderr();
@@ -43,6 +45,129 @@ fn main() {
     assert_eq!(0xFEDCBA987654321123456789ABCDEFu128 >> 64, 0xFEDCBA98765432u128);
     assert_eq!(0xFEDCBA987654321123456789ABCDEFu128 as i128 >> 64, 0xFEDCBA98765432i128);
     assert_eq!(353985398u128 * 932490u128, 330087843781020u128);
+
+    unsafe {
+        test_simd();
+    }
+}
+
+#[target_feature(enable = "sse2")]
+unsafe fn test_simd() {
+    let x = _mm_setzero_si128();
+    let y = _mm_set1_epi16(7);
+    let or = _mm_or_si128(x, y);
+    let cmp_eq = _mm_cmpeq_epi8(y, y);
+    let cmp_lt = _mm_cmplt_epi8(y, y);
+
+    assert_eq!(std::mem::transmute::<_, [u16; 8]>(or), [7, 7, 7, 7, 7, 7, 7, 7]);
+    assert_eq!(std::mem::transmute::<_, [u16; 8]>(cmp_eq), [0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff]);
+    assert_eq!(std::mem::transmute::<_, [u16; 8]>(cmp_lt), [0, 0, 0, 0, 0, 0, 0, 0]);
+
+    test_mm_slli_si128();
+    test_mm_movemask_epi8();
+    test_mm256_movemask_epi8();
+    test_mm_add_epi8();
+    test_mm_add_pd();
+
+    let mask1 = _mm_movemask_epi8(dbg!(_mm_setr_epi8(255u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)));
+    assert_eq!(mask1, 1);
+}
+
+#[target_feature(enable = "sse2")]
+unsafe fn test_mm_slli_si128() {
+    #[rustfmt::skip]
+    let a = _mm_setr_epi8(
+        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+    );
+    let r = _mm_slli_si128(a, 1);
+    let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+    assert_eq_m128i(r, e);
+
+    #[rustfmt::skip]
+    let a = _mm_setr_epi8(
+        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+    );
+    let r = _mm_slli_si128(a, 15);
+    let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
+    assert_eq_m128i(r, e);
+
+    #[rustfmt::skip]
+    let a = _mm_setr_epi8(
+        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+    );
+    let r = _mm_slli_si128(a, 16);
+    assert_eq_m128i(r, _mm_set1_epi8(0));
+
+    #[rustfmt::skip]
+    let a = _mm_setr_epi8(
+        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+    );
+    let r = _mm_slli_si128(a, -1);
+    assert_eq_m128i(_mm_set1_epi8(0), r);
+
+    #[rustfmt::skip]
+    let a = _mm_setr_epi8(
+        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+    );
+    let r = _mm_slli_si128(a, -0x80000000);
+    assert_eq_m128i(r, _mm_set1_epi8(0));
+}
+
+#[target_feature(enable = "sse2")]
+unsafe fn test_mm_movemask_epi8() {
+    #[rustfmt::skip]
+    let a = _mm_setr_epi8(
+        0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
+        0b0101, 0b1111_0000u8 as i8, 0, 0,
+        0, 0, 0b1111_0000u8 as i8, 0b0101,
+        0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
+    );
+    let r = _mm_movemask_epi8(a);
+    assert_eq!(r, 0b10100100_00100101);
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn test_mm256_movemask_epi8() {
+    let a = _mm256_set1_epi8(-1);
+    let r = _mm256_movemask_epi8(a);
+    let e = -1;
+    assert_eq!(r, e);
+}
+
+#[target_feature(enable = "sse2")]
+unsafe fn test_mm_add_epi8() {
+    let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+    #[rustfmt::skip]
+    let b = _mm_setr_epi8(
+        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+    );
+    let r = _mm_add_epi8(a, b);
+    #[rustfmt::skip]
+    let e = _mm_setr_epi8(
+        16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
+    );
+    assert_eq_m128i(r, e);
+}
+
+#[target_feature(enable = "sse2")]
+unsafe fn test_mm_add_pd() {
+    let a = _mm_setr_pd(1.0, 2.0);
+    let b = _mm_setr_pd(5.0, 10.0);
+    let r = _mm_add_pd(a, b);
+    assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
+}
+
+fn assert_eq_m128i(x: std::arch::x86_64::__m128i, y: std::arch::x86_64::__m128i) {
+    unsafe {
+        assert_eq!(std::mem::transmute::<_, [u8; 16]>(x), std::mem::transmute::<_, [u8; 16]>(x));
+    }
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) {
+    if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 {
+        panic!("{:?} != {:?}", a, b);
+    }
 }
 
 #[derive(PartialEq)]