about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--src/tools/miri/tests/pass/shims/x86/intrinsics-x86-sse.rs2013
-rw-r--r--src/tools/miri/tests/pass/shims/x86/intrinsics-x86-sse2.rs1531
2 files changed, 1760 insertions, 1784 deletions
diff --git a/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-sse.rs b/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-sse.rs
index a62a5ee3781..6f7ab3b3c9f 100644
--- a/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-sse.rs
+++ b/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-sse.rs
@@ -1,1107 +1,1088 @@
+// We're testing x86 target specific features
+//@only-target: x86_64 i686
+
+#[cfg(target_arch = "x86")]
+use std::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use std::arch::x86_64::*;
+use std::f32::NAN;
+use std::mem::transmute;
+
 fn main() {
-    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-    {
-        assert!(is_x86_feature_detected!("sse"));
+    assert!(is_x86_feature_detected!("sse"));
 
-        unsafe {
-            tests::test_sse();
-        }
+    unsafe {
+        test_sse();
     }
 }
 
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-mod tests {
-    #[cfg(target_arch = "x86")]
-    use std::arch::x86::*;
-    #[cfg(target_arch = "x86_64")]
-    use std::arch::x86_64::*;
-    use std::f32::NAN;
-    use std::mem::transmute;
-
-    macro_rules! assert_approx_eq {
-        ($a:expr, $b:expr, $eps:expr) => {{
-            let (a, b) = (&$a, &$b);
-            assert!(
-                (*a - *b).abs() < $eps,
-                "assertion failed: `(left !== right)` \
+macro_rules! assert_approx_eq {
+    ($a:expr, $b:expr, $eps:expr) => {{
+        let (a, b) = (&$a, &$b);
+        assert!(
+            (*a - *b).abs() < $eps,
+            "assertion failed: `(left !== right)` \
              (left: `{:?}`, right: `{:?}`, expect diff: `{:?}`, real diff: `{:?}`)",
-                *a,
-                *b,
-                $eps,
-                (*a - *b).abs()
-            );
-        }};
-    }
+            *a,
+            *b,
+            $eps,
+            (*a - *b).abs()
+        );
+    }};
+}
+
+#[target_feature(enable = "sse")]
+unsafe fn test_sse() {
+    // Mostly copied from library/stdarch/crates/core_arch/src/x86{,_64}/sse.rs
 
     #[target_feature(enable = "sse")]
-    pub(super) unsafe fn test_sse() {
-        // Mostly copied from library/stdarch/crates/core_arch/src/x86{,_64}/sse.rs
-
-        #[target_feature(enable = "sse")]
-        unsafe fn assert_eq_m128(a: __m128, b: __m128) {
-            let r = _mm_cmpeq_ps(a, b);
-            if _mm_movemask_ps(r) != 0b1111 {
-                panic!("{:?} != {:?}", a, b);
-            }
+    unsafe fn assert_eq_m128(a: __m128, b: __m128) {
+        let r = _mm_cmpeq_ps(a, b);
+        if _mm_movemask_ps(r) != 0b1111 {
+            panic!("{:?} != {:?}", a, b);
         }
+    }
 
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_add_ss() {
-            let a = _mm_set_ps(-1.0, 5.0, 0.0, -10.0);
-            let b = _mm_set_ps(-100.0, 20.0, 0.0, -5.0);
-            let r = _mm_add_ss(a, b);
-            assert_eq_m128(r, _mm_set_ps(-1.0, 5.0, 0.0, -15.0));
-        }
-        test_mm_add_ss();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_sub_ss() {
-            let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
-            let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
-            let r = _mm_sub_ss(a, b);
-            assert_eq_m128(r, _mm_setr_ps(99.0, 5.0, 0.0, -10.0));
-        }
-        test_mm_sub_ss();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_mul_ss() {
-            let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
-            let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
-            let r = _mm_mul_ss(a, b);
-            assert_eq_m128(r, _mm_setr_ps(100.0, 5.0, 0.0, -10.0));
-        }
-        test_mm_mul_ss();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_div_ss() {
-            let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
-            let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
-            let r = _mm_div_ss(a, b);
-            assert_eq_m128(r, _mm_setr_ps(0.01, 5.0, 0.0, -10.0));
-        }
-        test_mm_div_ss();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_sqrt_ss() {
-            let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
-            let r = _mm_sqrt_ss(a);
-            let e = _mm_setr_ps(2.0, 13.0, 16.0, 100.0);
-            assert_eq_m128(r, e);
-        }
-        test_mm_sqrt_ss();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_sqrt_ps() {
-            let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
-            let r = _mm_sqrt_ps(a);
-            let e = _mm_setr_ps(2.0, 3.6055512, 4.0, 10.0);
-            assert_eq_m128(r, e);
-        }
-        test_mm_sqrt_ps();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_rcp_ss() {
-            let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
-            let r = _mm_rcp_ss(a);
-            let e = _mm_setr_ps(0.24993896, 13.0, 16.0, 100.0);
-            let rel_err = 0.00048828125;
-
-            let r: [f32; 4] = transmute(r);
-            let e: [f32; 4] = transmute(e);
-            assert_approx_eq!(r[0], e[0], 2. * rel_err);
-            for i in 1..4 {
-                assert_eq!(r[i], e[i]);
-            }
-        }
-        test_mm_rcp_ss();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_rcp_ps() {
-            let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
-            let r = _mm_rcp_ps(a);
-            let e = _mm_setr_ps(0.24993896, 0.0769043, 0.06248474, 0.0099983215);
-            let rel_err = 0.00048828125;
-
-            let r: [f32; 4] = transmute(r);
-            let e: [f32; 4] = transmute(e);
-            for i in 0..4 {
-                assert_approx_eq!(r[i], e[i], 2. * rel_err);
-            }
-        }
-        test_mm_rcp_ps();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_rsqrt_ss() {
-            let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
-            let r = _mm_rsqrt_ss(a);
-            let e = _mm_setr_ps(0.49987793, 13.0, 16.0, 100.0);
-            let rel_err = 0.00048828125;
-
-            let r: [f32; 4] = transmute(r);
-            let e: [f32; 4] = transmute(e);
-            assert_approx_eq!(r[0], e[0], 2. * rel_err);
-            for i in 1..4 {
-                assert_eq!(r[i], e[i]);
-            }
-        }
-        test_mm_rsqrt_ss();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_rsqrt_ps() {
-            let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
-            let r = _mm_rsqrt_ps(a);
-            let e = _mm_setr_ps(0.49987793, 0.2772827, 0.24993896, 0.099990845);
-            let rel_err = 0.00048828125;
-
-            let r: [f32; 4] = transmute(r);
-            let e: [f32; 4] = transmute(e);
-            for i in 0..4 {
-                assert_approx_eq!(r[i], e[i], 2. * rel_err);
-            }
-        }
-        test_mm_rsqrt_ps();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_min_ss() {
-            let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
-            let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
-            let r = _mm_min_ss(a, b);
-            assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0));
-        }
-        test_mm_min_ss();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_min_ps() {
-            let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
-            let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
-            let r = _mm_min_ps(a, b);
-            assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0));
-
-            // `_mm_min_ps` can **not** be implemented using the `simd_min` rust intrinsic because
-            // the semantics of `simd_min` are different to those of `_mm_min_ps` regarding handling
-            // of `-0.0`.
-            let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
-            let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
-            let r1: [u8; 16] = transmute(_mm_min_ps(a, b));
-            let r2: [u8; 16] = transmute(_mm_min_ps(b, a));
-            let a: [u8; 16] = transmute(a);
-            let b: [u8; 16] = transmute(b);
-            assert_eq!(r1, b);
-            assert_eq!(r2, a);
-            assert_ne!(a, b); // sanity check that -0.0 is actually present
-        }
-        test_mm_min_ps();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_max_ss() {
-            let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
-            let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
-            let r = _mm_max_ss(a, b);
-            assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, -10.0));
-        }
-        test_mm_max_ss();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_max_ps() {
-            let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
-            let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
-            let r = _mm_max_ps(a, b);
-            assert_eq_m128(r, _mm_setr_ps(-1.0, 20.0, 0.0, -5.0));
-
-            // `_mm_max_ps` can **not** be implemented using the `simd_max` rust intrinsic because
-            // the semantics of `simd_max` are different to those of `_mm_max_ps` regarding handling
-            // of `-0.0`.
-            let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
-            let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
-            let r1: [u8; 16] = transmute(_mm_max_ps(a, b));
-            let r2: [u8; 16] = transmute(_mm_max_ps(b, a));
-            let a: [u8; 16] = transmute(a);
-            let b: [u8; 16] = transmute(b);
-            assert_eq!(r1, b);
-            assert_eq!(r2, a);
-            assert_ne!(a, b); // sanity check that -0.0 is actually present
-        }
-        test_mm_max_ps();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmpeq_ss() {
-            let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
-            let b = _mm_setr_ps(-1.0, 5.0, 6.0, 7.0);
-            let r: [u32; 4] = transmute(_mm_cmpeq_ss(a, b));
-            let e: [u32; 4] = transmute(_mm_setr_ps(transmute(0u32), 2.0, 3.0, 4.0));
-            assert_eq!(r, e);
-
-            let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
-            let r2: [u32; 4] = transmute(_mm_cmpeq_ss(a, b2));
-            let e2: [u32; 4] = transmute(_mm_setr_ps(transmute(0xffffffffu32), 2.0, 3.0, 4.0));
-            assert_eq!(r2, e2);
-        }
-        test_mm_cmpeq_ss();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmplt_ss() {
-            let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
-            let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
-            let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
-            let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
-
-            let b1 = 0u32; // a.extract(0) < b.extract(0)
-            let c1 = 0u32; // a.extract(0) < c.extract(0)
-            let d1 = !0u32; // a.extract(0) < d.extract(0)
-
-            let rb: [u32; 4] = transmute(_mm_cmplt_ss(a, b));
-            let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
-            assert_eq!(rb, eb);
-
-            let rc: [u32; 4] = transmute(_mm_cmplt_ss(a, c));
-            let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
-            assert_eq!(rc, ec);
-
-            let rd: [u32; 4] = transmute(_mm_cmplt_ss(a, d));
-            let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
-            assert_eq!(rd, ed);
-        }
-        test_mm_cmplt_ss();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmple_ss() {
-            let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
-            let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
-            let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
-            let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
-
-            let b1 = 0u32; // a.extract(0) <= b.extract(0)
-            let c1 = !0u32; // a.extract(0) <= c.extract(0)
-            let d1 = !0u32; // a.extract(0) <= d.extract(0)
-
-            let rb: [u32; 4] = transmute(_mm_cmple_ss(a, b));
-            let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
-            assert_eq!(rb, eb);
-
-            let rc: [u32; 4] = transmute(_mm_cmple_ss(a, c));
-            let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
-            assert_eq!(rc, ec);
-
-            let rd: [u32; 4] = transmute(_mm_cmple_ss(a, d));
-            let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
-            assert_eq!(rd, ed);
-        }
-        test_mm_cmple_ss();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmpgt_ss() {
-            let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
-            let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
-            let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
-            let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
-
-            let b1 = !0u32; // a.extract(0) > b.extract(0)
-            let c1 = 0u32; // a.extract(0) > c.extract(0)
-            let d1 = 0u32; // a.extract(0) > d.extract(0)
-
-            let rb: [u32; 4] = transmute(_mm_cmpgt_ss(a, b));
-            let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
-            assert_eq!(rb, eb);
-
-            let rc: [u32; 4] = transmute(_mm_cmpgt_ss(a, c));
-            let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
-            assert_eq!(rc, ec);
-
-            let rd: [u32; 4] = transmute(_mm_cmpgt_ss(a, d));
-            let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
-            assert_eq!(rd, ed);
-        }
-        test_mm_cmpgt_ss();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmpge_ss() {
-            let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
-            let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
-            let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
-            let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
-
-            let b1 = !0u32; // a.extract(0) >= b.extract(0)
-            let c1 = !0u32; // a.extract(0) >= c.extract(0)
-            let d1 = 0u32; // a.extract(0) >= d.extract(0)
-
-            let rb: [u32; 4] = transmute(_mm_cmpge_ss(a, b));
-            let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
-            assert_eq!(rb, eb);
-
-            let rc: [u32; 4] = transmute(_mm_cmpge_ss(a, c));
-            let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
-            assert_eq!(rc, ec);
-
-            let rd: [u32; 4] = transmute(_mm_cmpge_ss(a, d));
-            let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
-            assert_eq!(rd, ed);
-        }
-        test_mm_cmpge_ss();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmpneq_ss() {
-            let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
-            let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
-            let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
-            let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
-
-            let b1 = !0u32; // a.extract(0) != b.extract(0)
-            let c1 = 0u32; // a.extract(0) != c.extract(0)
-            let d1 = !0u32; // a.extract(0) != d.extract(0)
-
-            let rb: [u32; 4] = transmute(_mm_cmpneq_ss(a, b));
-            let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
-            assert_eq!(rb, eb);
-
-            let rc: [u32; 4] = transmute(_mm_cmpneq_ss(a, c));
-            let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
-            assert_eq!(rc, ec);
-
-            let rd: [u32; 4] = transmute(_mm_cmpneq_ss(a, d));
-            let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
-            assert_eq!(rd, ed);
-        }
-        test_mm_cmpneq_ss();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmpnlt_ss() {
-            // TODO: this test is exactly the same as for `_mm_cmpge_ss`, but there
-            // must be a difference. It may have to do with behavior in the
-            // presence of NaNs (signaling or quiet). If so, we should add tests
-            // for those.
-
-            let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
-            let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
-            let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
-            let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
-
-            let b1 = !0u32; // a.extract(0) >= b.extract(0)
-            let c1 = !0u32; // a.extract(0) >= c.extract(0)
-            let d1 = 0u32; // a.extract(0) >= d.extract(0)
-
-            let rb: [u32; 4] = transmute(_mm_cmpnlt_ss(a, b));
-            let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
-            assert_eq!(rb, eb);
-
-            let rc: [u32; 4] = transmute(_mm_cmpnlt_ss(a, c));
-            let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
-            assert_eq!(rc, ec);
-
-            let rd: [u32; 4] = transmute(_mm_cmpnlt_ss(a, d));
-            let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
-            assert_eq!(rd, ed);
-        }
-        test_mm_cmpnlt_ss();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmpnle_ss() {
-            // TODO: this test is exactly the same as for `_mm_cmpgt_ss`, but there
-            // must be a difference. It may have to do with behavior in the
-            // presence
-            // of NaNs (signaling or quiet). If so, we should add tests for those.
-
-            let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
-            let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
-            let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
-            let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
-
-            let b1 = !0u32; // a.extract(0) > b.extract(0)
-            let c1 = 0u32; // a.extract(0) > c.extract(0)
-            let d1 = 0u32; // a.extract(0) > d.extract(0)
-
-            let rb: [u32; 4] = transmute(_mm_cmpnle_ss(a, b));
-            let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
-            assert_eq!(rb, eb);
-
-            let rc: [u32; 4] = transmute(_mm_cmpnle_ss(a, c));
-            let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
-            assert_eq!(rc, ec);
-
-            let rd: [u32; 4] = transmute(_mm_cmpnle_ss(a, d));
-            let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
-            assert_eq!(rd, ed);
-        }
-        test_mm_cmpnle_ss();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmpngt_ss() {
-            // TODO: this test is exactly the same as for `_mm_cmple_ss`, but there
-            // must be a difference. It may have to do with behavior in the
-            // presence of NaNs (signaling or quiet). If so, we should add tests
-            // for those.
-
-            let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
-            let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
-            let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
-            let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
-
-            let b1 = 0u32; // a.extract(0) <= b.extract(0)
-            let c1 = !0u32; // a.extract(0) <= c.extract(0)
-            let d1 = !0u32; // a.extract(0) <= d.extract(0)
-
-            let rb: [u32; 4] = transmute(_mm_cmpngt_ss(a, b));
-            let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
-            assert_eq!(rb, eb);
-
-            let rc: [u32; 4] = transmute(_mm_cmpngt_ss(a, c));
-            let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
-            assert_eq!(rc, ec);
-
-            let rd: [u32; 4] = transmute(_mm_cmpngt_ss(a, d));
-            let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
-            assert_eq!(rd, ed);
-        }
-        test_mm_cmpngt_ss();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmpnge_ss() {
-            // TODO: this test is exactly the same as for `_mm_cmplt_ss`, but there
-            // must be a difference. It may have to do with behavior in the
-            // presence of NaNs (signaling or quiet). If so, we should add tests
-            // for those.
-
-            let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
-            let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
-            let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
-            let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
-
-            let b1 = 0u32; // a.extract(0) < b.extract(0)
-            let c1 = 0u32; // a.extract(0) < c.extract(0)
-            let d1 = !0u32; // a.extract(0) < d.extract(0)
-
-            let rb: [u32; 4] = transmute(_mm_cmpnge_ss(a, b));
-            let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
-            assert_eq!(rb, eb);
-
-            let rc: [u32; 4] = transmute(_mm_cmpnge_ss(a, c));
-            let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
-            assert_eq!(rc, ec);
-
-            let rd: [u32; 4] = transmute(_mm_cmpnge_ss(a, d));
-            let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
-            assert_eq!(rd, ed);
-        }
-        test_mm_cmpnge_ss();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmpord_ss() {
-            let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
-            let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
-            let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
-            let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
-
-            let b1 = !0u32; // a.extract(0) ord b.extract(0)
-            let c1 = 0u32; // a.extract(0) ord c.extract(0)
-            let d1 = !0u32; // a.extract(0) ord d.extract(0)
-
-            let rb: [u32; 4] = transmute(_mm_cmpord_ss(a, b));
-            let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
-            assert_eq!(rb, eb);
-
-            let rc: [u32; 4] = transmute(_mm_cmpord_ss(a, c));
-            let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
-            assert_eq!(rc, ec);
-
-            let rd: [u32; 4] = transmute(_mm_cmpord_ss(a, d));
-            let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
-            assert_eq!(rd, ed);
-        }
-        test_mm_cmpord_ss();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmpunord_ss() {
-            let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
-            let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
-            let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
-            let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
-
-            let b1 = 0u32; // a.extract(0) unord b.extract(0)
-            let c1 = !0u32; // a.extract(0) unord c.extract(0)
-            let d1 = 0u32; // a.extract(0) unord d.extract(0)
-
-            let rb: [u32; 4] = transmute(_mm_cmpunord_ss(a, b));
-            let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
-            assert_eq!(rb, eb);
-
-            let rc: [u32; 4] = transmute(_mm_cmpunord_ss(a, c));
-            let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
-            assert_eq!(rc, ec);
-
-            let rd: [u32; 4] = transmute(_mm_cmpunord_ss(a, d));
-            let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
-            assert_eq!(rd, ed);
-        }
-        test_mm_cmpunord_ss();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmpeq_ps() {
-            let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
-            let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
-            let tru = !0u32;
-            let fls = 0u32;
-
-            let e = [fls, fls, tru, fls];
-            let r: [u32; 4] = transmute(_mm_cmpeq_ps(a, b));
-            assert_eq!(r, e);
-        }
-        test_mm_cmpeq_ps();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmplt_ps() {
-            let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
-            let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
-            let tru = !0u32;
-            let fls = 0u32;
-
-            let e = [tru, fls, fls, fls];
-            let r: [u32; 4] = transmute(_mm_cmplt_ps(a, b));
-            assert_eq!(r, e);
-        }
-        test_mm_cmplt_ps();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmple_ps() {
-            let a = _mm_setr_ps(10.0, 50.0, 1.0, 4.0);
-            let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
-            let tru = !0u32;
-            let fls = 0u32;
-
-            let e = [tru, fls, tru, fls];
-            let r: [u32; 4] = transmute(_mm_cmple_ps(a, b));
-            assert_eq!(r, e);
-        }
-        test_mm_cmple_ps();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmpgt_ps() {
-            let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
-            let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
-            let tru = !0u32;
-            let fls = 0u32;
-
-            let e = [fls, tru, fls, fls];
-            let r: [u32; 4] = transmute(_mm_cmpgt_ps(a, b));
-            assert_eq!(r, e);
-        }
-        test_mm_cmpgt_ps();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmpge_ps() {
-            let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
-            let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
-            let tru = !0u32;
-            let fls = 0u32;
-
-            let e = [fls, tru, tru, fls];
-            let r: [u32; 4] = transmute(_mm_cmpge_ps(a, b));
-            assert_eq!(r, e);
-        }
-        test_mm_cmpge_ps();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmpneq_ps() {
-            let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
-            let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
-            let tru = !0u32;
-            let fls = 0u32;
-
-            let e = [tru, tru, fls, tru];
-            let r: [u32; 4] = transmute(_mm_cmpneq_ps(a, b));
-            assert_eq!(r, e);
-        }
-        test_mm_cmpneq_ps();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmpnlt_ps() {
-            let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
-            let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
-            let tru = !0u32;
-            let fls = 0u32;
-
-            let e = [fls, tru, tru, tru];
-            let r: [u32; 4] = transmute(_mm_cmpnlt_ps(a, b));
-            assert_eq!(r, e);
-        }
-        test_mm_cmpnlt_ps();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmpnle_ps() {
-            let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
-            let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
-            let tru = !0u32;
-            let fls = 0u32;
-
-            let e = [fls, tru, fls, tru];
-            let r: [u32; 4] = transmute(_mm_cmpnle_ps(a, b));
-            assert_eq!(r, e);
-        }
-        test_mm_cmpnle_ps();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmpngt_ps() {
-            let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
-            let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
-            let tru = !0u32;
-            let fls = 0u32;
-
-            let e = [tru, fls, tru, tru];
-            let r: [u32; 4] = transmute(_mm_cmpngt_ps(a, b));
-            assert_eq!(r, e);
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_add_ss() {
+        let a = _mm_set_ps(-1.0, 5.0, 0.0, -10.0);
+        let b = _mm_set_ps(-100.0, 20.0, 0.0, -5.0);
+        let r = _mm_add_ss(a, b);
+        assert_eq_m128(r, _mm_set_ps(-1.0, 5.0, 0.0, -15.0));
+    }
+    test_mm_add_ss();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_sub_ss() {
+        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
+        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
+        let r = _mm_sub_ss(a, b);
+        assert_eq_m128(r, _mm_setr_ps(99.0, 5.0, 0.0, -10.0));
+    }
+    test_mm_sub_ss();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_mul_ss() {
+        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
+        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
+        let r = _mm_mul_ss(a, b);
+        assert_eq_m128(r, _mm_setr_ps(100.0, 5.0, 0.0, -10.0));
+    }
+    test_mm_mul_ss();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_div_ss() {
+        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
+        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
+        let r = _mm_div_ss(a, b);
+        assert_eq_m128(r, _mm_setr_ps(0.01, 5.0, 0.0, -10.0));
+    }
+    test_mm_div_ss();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_sqrt_ss() {
+        let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
+        let r = _mm_sqrt_ss(a);
+        let e = _mm_setr_ps(2.0, 13.0, 16.0, 100.0);
+        assert_eq_m128(r, e);
+    }
+    test_mm_sqrt_ss();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_sqrt_ps() {
+        let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
+        let r = _mm_sqrt_ps(a);
+        let e = _mm_setr_ps(2.0, 3.6055512, 4.0, 10.0);
+        assert_eq_m128(r, e);
+    }
+    test_mm_sqrt_ps();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_rcp_ss() {
+        let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
+        let r = _mm_rcp_ss(a);
+        let e = _mm_setr_ps(0.24993896, 13.0, 16.0, 100.0);
+        let rel_err = 0.00048828125;
+
+        let r: [f32; 4] = transmute(r);
+        let e: [f32; 4] = transmute(e);
+        assert_approx_eq!(r[0], e[0], 2. * rel_err);
+        for i in 1..4 {
+            assert_eq!(r[i], e[i]);
         }
-        test_mm_cmpngt_ps();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmpnge_ps() {
-            let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
-            let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
-            let tru = !0u32;
-            let fls = 0u32;
-
-            let e = [tru, fls, fls, tru];
-            let r: [u32; 4] = transmute(_mm_cmpnge_ps(a, b));
-            assert_eq!(r, e);
+    }
+    test_mm_rcp_ss();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_rcp_ps() {
+        let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
+        let r = _mm_rcp_ps(a);
+        let e = _mm_setr_ps(0.24993896, 0.0769043, 0.06248474, 0.0099983215);
+        let rel_err = 0.00048828125;
+
+        let r: [f32; 4] = transmute(r);
+        let e: [f32; 4] = transmute(e);
+        for i in 0..4 {
+            assert_approx_eq!(r[i], e[i], 2. * rel_err);
         }
-        test_mm_cmpnge_ps();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmpord_ps() {
-            let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
-            let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
-            let tru = !0u32;
-            let fls = 0u32;
-
-            let e = [tru, fls, fls, fls];
-            let r: [u32; 4] = transmute(_mm_cmpord_ps(a, b));
-            assert_eq!(r, e);
+    }
+    test_mm_rcp_ps();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_rsqrt_ss() {
+        let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
+        let r = _mm_rsqrt_ss(a);
+        let e = _mm_setr_ps(0.49987793, 13.0, 16.0, 100.0);
+        let rel_err = 0.00048828125;
+
+        let r: [f32; 4] = transmute(r);
+        let e: [f32; 4] = transmute(e);
+        assert_approx_eq!(r[0], e[0], 2. * rel_err);
+        for i in 1..4 {
+            assert_eq!(r[i], e[i]);
         }
-        test_mm_cmpord_ps();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cmpunord_ps() {
-            let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
-            let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
-            let tru = !0u32;
-            let fls = 0u32;
-
-            let e = [fls, tru, tru, tru];
-            let r: [u32; 4] = transmute(_mm_cmpunord_ps(a, b));
-            assert_eq!(r, e);
+    }
+    test_mm_rsqrt_ss();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_rsqrt_ps() {
+        let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
+        let r = _mm_rsqrt_ps(a);
+        let e = _mm_setr_ps(0.49987793, 0.2772827, 0.24993896, 0.099990845);
+        let rel_err = 0.00048828125;
+
+        let r: [f32; 4] = transmute(r);
+        let e: [f32; 4] = transmute(e);
+        for i in 0..4 {
+            assert_approx_eq!(r[i], e[i], 2. * rel_err);
         }
-        test_mm_cmpunord_ps();
+    }
+    test_mm_rsqrt_ps();
 
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_comieq_ss() {
-            let aa = &[3.0f32, 12.0, 23.0, NAN];
-            let bb = &[3.0f32, 47.5, 1.5, NAN];
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_min_ss() {
+        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
+        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
+        let r = _mm_min_ss(a, b);
+        assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0));
+    }
+    test_mm_min_ss();
 
-            let ee = &[1i32, 0, 0, 0];
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_min_ps() {
+        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
+        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
+        let r = _mm_min_ps(a, b);
+        assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0));
+
+        // `_mm_min_ps` can **not** be implemented using the `simd_min` rust intrinsic because
+        // the semantics of `simd_min` are different to those of `_mm_min_ps` regarding handling
+        // of `-0.0`.
+        let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
+        let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
+        let r1: [u8; 16] = transmute(_mm_min_ps(a, b));
+        let r2: [u8; 16] = transmute(_mm_min_ps(b, a));
+        let a: [u8; 16] = transmute(a);
+        let b: [u8; 16] = transmute(b);
+        assert_eq!(r1, b);
+        assert_eq!(r2, a);
+        assert_ne!(a, b); // sanity check that -0.0 is actually present
+    }
+    test_mm_min_ps();
 
-            for i in 0..4 {
-                let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
-                let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_max_ss() {
+        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
+        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
+        let r = _mm_max_ss(a, b);
+        assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, -10.0));
+    }
+    test_mm_max_ss();
 
-                let r = _mm_comieq_ss(a, b);
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_max_ps() {
+        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
+        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
+        let r = _mm_max_ps(a, b);
+        assert_eq_m128(r, _mm_setr_ps(-1.0, 20.0, 0.0, -5.0));
+
+        // `_mm_max_ps` can **not** be implemented using the `simd_max` rust intrinsic because
+        // the semantics of `simd_max` are different to those of `_mm_max_ps` regarding handling
+        // of `-0.0`.
+        let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
+        let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
+        let r1: [u8; 16] = transmute(_mm_max_ps(a, b));
+        let r2: [u8; 16] = transmute(_mm_max_ps(b, a));
+        let a: [u8; 16] = transmute(a);
+        let b: [u8; 16] = transmute(b);
+        assert_eq!(r1, b);
+        assert_eq!(r2, a);
+        assert_ne!(a, b); // sanity check that -0.0 is actually present
+    }
+    test_mm_max_ps();
 
-                assert_eq!(
-                    ee[i], r,
-                    "_mm_comieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                    a, b, r, ee[i], i
-                );
-            }
-        }
-        test_mm_comieq_ss();
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmpeq_ss() {
+        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+        let b = _mm_setr_ps(-1.0, 5.0, 6.0, 7.0);
+        let r: [u32; 4] = transmute(_mm_cmpeq_ss(a, b));
+        let e: [u32; 4] = transmute(_mm_setr_ps(transmute(0u32), 2.0, 3.0, 4.0));
+        assert_eq!(r, e);
+
+        let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
+        let r2: [u32; 4] = transmute(_mm_cmpeq_ss(a, b2));
+        let e2: [u32; 4] = transmute(_mm_setr_ps(transmute(0xffffffffu32), 2.0, 3.0, 4.0));
+        assert_eq!(r2, e2);
+    }
+    test_mm_cmpeq_ss();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmplt_ss() {
+        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
+        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
+        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
+
+        let b1 = 0u32; // a.extract(0) < b.extract(0)
+        let c1 = 0u32; // a.extract(0) < c.extract(0)
+        let d1 = !0u32; // a.extract(0) < d.extract(0)
+
+        let rb: [u32; 4] = transmute(_mm_cmplt_ss(a, b));
+        let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        assert_eq!(rb, eb);
+
+        let rc: [u32; 4] = transmute(_mm_cmplt_ss(a, c));
+        let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        assert_eq!(rc, ec);
+
+        let rd: [u32; 4] = transmute(_mm_cmplt_ss(a, d));
+        let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        assert_eq!(rd, ed);
+    }
+    test_mm_cmplt_ss();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmple_ss() {
+        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
+        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
+        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
+
+        let b1 = 0u32; // a.extract(0) <= b.extract(0)
+        let c1 = !0u32; // a.extract(0) <= c.extract(0)
+        let d1 = !0u32; // a.extract(0) <= d.extract(0)
+
+        let rb: [u32; 4] = transmute(_mm_cmple_ss(a, b));
+        let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        assert_eq!(rb, eb);
+
+        let rc: [u32; 4] = transmute(_mm_cmple_ss(a, c));
+        let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        assert_eq!(rc, ec);
+
+        let rd: [u32; 4] = transmute(_mm_cmple_ss(a, d));
+        let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        assert_eq!(rd, ed);
+    }
+    test_mm_cmple_ss();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmpgt_ss() {
+        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
+        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
+        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
+
+        let b1 = !0u32; // a.extract(0) > b.extract(0)
+        let c1 = 0u32; // a.extract(0) > c.extract(0)
+        let d1 = 0u32; // a.extract(0) > d.extract(0)
+
+        let rb: [u32; 4] = transmute(_mm_cmpgt_ss(a, b));
+        let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        assert_eq!(rb, eb);
+
+        let rc: [u32; 4] = transmute(_mm_cmpgt_ss(a, c));
+        let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        assert_eq!(rc, ec);
+
+        let rd: [u32; 4] = transmute(_mm_cmpgt_ss(a, d));
+        let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        assert_eq!(rd, ed);
+    }
+    test_mm_cmpgt_ss();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmpge_ss() {
+        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
+        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
+        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
+
+        let b1 = !0u32; // a.extract(0) >= b.extract(0)
+        let c1 = !0u32; // a.extract(0) >= c.extract(0)
+        let d1 = 0u32; // a.extract(0) >= d.extract(0)
+
+        let rb: [u32; 4] = transmute(_mm_cmpge_ss(a, b));
+        let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        assert_eq!(rb, eb);
+
+        let rc: [u32; 4] = transmute(_mm_cmpge_ss(a, c));
+        let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        assert_eq!(rc, ec);
+
+        let rd: [u32; 4] = transmute(_mm_cmpge_ss(a, d));
+        let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        assert_eq!(rd, ed);
+    }
+    test_mm_cmpge_ss();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmpneq_ss() {
+        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
+        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
+        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
+
+        let b1 = !0u32; // a.extract(0) != b.extract(0)
+        let c1 = 0u32; // a.extract(0) != c.extract(0)
+        let d1 = !0u32; // a.extract(0) != d.extract(0)
+
+        let rb: [u32; 4] = transmute(_mm_cmpneq_ss(a, b));
+        let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        assert_eq!(rb, eb);
+
+        let rc: [u32; 4] = transmute(_mm_cmpneq_ss(a, c));
+        let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        assert_eq!(rc, ec);
+
+        let rd: [u32; 4] = transmute(_mm_cmpneq_ss(a, d));
+        let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        assert_eq!(rd, ed);
+    }
+    test_mm_cmpneq_ss();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmpnlt_ss() {
+        // TODO: this test is exactly the same as for `_mm_cmpge_ss`, but there
+        // must be a difference. It may have to do with behavior in the
+        // presence of NaNs (signaling or quiet). If so, we should add tests
+        // for those.
+
+        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
+        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
+        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
+
+        let b1 = !0u32; // a.extract(0) >= b.extract(0)
+        let c1 = !0u32; // a.extract(0) >= c.extract(0)
+        let d1 = 0u32; // a.extract(0) >= d.extract(0)
+
+        let rb: [u32; 4] = transmute(_mm_cmpnlt_ss(a, b));
+        let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        assert_eq!(rb, eb);
+
+        let rc: [u32; 4] = transmute(_mm_cmpnlt_ss(a, c));
+        let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        assert_eq!(rc, ec);
+
+        let rd: [u32; 4] = transmute(_mm_cmpnlt_ss(a, d));
+        let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        assert_eq!(rd, ed);
+    }
+    test_mm_cmpnlt_ss();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmpnle_ss() {
+        // TODO: this test is exactly the same as for `_mm_cmpgt_ss`, but there
+        // must be a difference. It may have to do with behavior in the
+        // presence
+        // of NaNs (signaling or quiet). If so, we should add tests for those.
+
+        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
+        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
+        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
+
+        let b1 = !0u32; // a.extract(0) > b.extract(0)
+        let c1 = 0u32; // a.extract(0) > c.extract(0)
+        let d1 = 0u32; // a.extract(0) > d.extract(0)
+
+        let rb: [u32; 4] = transmute(_mm_cmpnle_ss(a, b));
+        let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        assert_eq!(rb, eb);
+
+        let rc: [u32; 4] = transmute(_mm_cmpnle_ss(a, c));
+        let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        assert_eq!(rc, ec);
+
+        let rd: [u32; 4] = transmute(_mm_cmpnle_ss(a, d));
+        let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        assert_eq!(rd, ed);
+    }
+    test_mm_cmpnle_ss();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmpngt_ss() {
+        // TODO: this test is exactly the same as for `_mm_cmple_ss`, but there
+        // must be a difference. It may have to do with behavior in the
+        // presence of NaNs (signaling or quiet). If so, we should add tests
+        // for those.
+
+        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
+        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
+        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
+
+        let b1 = 0u32; // a.extract(0) <= b.extract(0)
+        let c1 = !0u32; // a.extract(0) <= c.extract(0)
+        let d1 = !0u32; // a.extract(0) <= d.extract(0)
+
+        let rb: [u32; 4] = transmute(_mm_cmpngt_ss(a, b));
+        let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        assert_eq!(rb, eb);
+
+        let rc: [u32; 4] = transmute(_mm_cmpngt_ss(a, c));
+        let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        assert_eq!(rc, ec);
+
+        let rd: [u32; 4] = transmute(_mm_cmpngt_ss(a, d));
+        let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        assert_eq!(rd, ed);
+    }
+    test_mm_cmpngt_ss();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmpnge_ss() {
+        // TODO: this test is exactly the same as for `_mm_cmplt_ss`, but there
+        // must be a difference. It may have to do with behavior in the
+        // presence of NaNs (signaling or quiet). If so, we should add tests
+        // for those.
+
+        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
+        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
+        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
+
+        let b1 = 0u32; // a.extract(0) < b.extract(0)
+        let c1 = 0u32; // a.extract(0) < c.extract(0)
+        let d1 = !0u32; // a.extract(0) < d.extract(0)
+
+        let rb: [u32; 4] = transmute(_mm_cmpnge_ss(a, b));
+        let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        assert_eq!(rb, eb);
+
+        let rc: [u32; 4] = transmute(_mm_cmpnge_ss(a, c));
+        let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        assert_eq!(rc, ec);
+
+        let rd: [u32; 4] = transmute(_mm_cmpnge_ss(a, d));
+        let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        assert_eq!(rd, ed);
+    }
+    test_mm_cmpnge_ss();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmpord_ss() {
+        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
+        let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
+        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
+
+        let b1 = !0u32; // a.extract(0) ord b.extract(0)
+        let c1 = 0u32; // a.extract(0) ord c.extract(0)
+        let d1 = !0u32; // a.extract(0) ord d.extract(0)
+
+        let rb: [u32; 4] = transmute(_mm_cmpord_ss(a, b));
+        let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        assert_eq!(rb, eb);
+
+        let rc: [u32; 4] = transmute(_mm_cmpord_ss(a, c));
+        let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        assert_eq!(rc, ec);
+
+        let rd: [u32; 4] = transmute(_mm_cmpord_ss(a, d));
+        let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        assert_eq!(rd, ed);
+    }
+    test_mm_cmpord_ss();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmpunord_ss() {
+        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
+        let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
+        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
+
+        let b1 = 0u32; // a.extract(0) unord b.extract(0)
+        let c1 = !0u32; // a.extract(0) unord c.extract(0)
+        let d1 = 0u32; // a.extract(0) unord d.extract(0)
+
+        let rb: [u32; 4] = transmute(_mm_cmpunord_ss(a, b));
+        let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
+        assert_eq!(rb, eb);
+
+        let rc: [u32; 4] = transmute(_mm_cmpunord_ss(a, c));
+        let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
+        assert_eq!(rc, ec);
+
+        let rd: [u32; 4] = transmute(_mm_cmpunord_ss(a, d));
+        let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
+        assert_eq!(rd, ed);
+    }
+    test_mm_cmpunord_ss();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmpeq_ps() {
+        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
+        let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
+        let tru = !0u32;
+        let fls = 0u32;
+
+        let e = [fls, fls, tru, fls];
+        let r: [u32; 4] = transmute(_mm_cmpeq_ps(a, b));
+        assert_eq!(r, e);
+    }
+    test_mm_cmpeq_ps();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmplt_ps() {
+        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
+        let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
+        let tru = !0u32;
+        let fls = 0u32;
+
+        let e = [tru, fls, fls, fls];
+        let r: [u32; 4] = transmute(_mm_cmplt_ps(a, b));
+        assert_eq!(r, e);
+    }
+    test_mm_cmplt_ps();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmple_ps() {
+        let a = _mm_setr_ps(10.0, 50.0, 1.0, 4.0);
+        let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
+        let tru = !0u32;
+        let fls = 0u32;
+
+        let e = [tru, fls, tru, fls];
+        let r: [u32; 4] = transmute(_mm_cmple_ps(a, b));
+        assert_eq!(r, e);
+    }
+    test_mm_cmple_ps();
 
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_comilt_ss() {
-            let aa = &[3.0f32, 12.0, 23.0, NAN];
-            let bb = &[3.0f32, 47.5, 1.5, NAN];
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmpgt_ps() {
+        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
+        let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
+        let tru = !0u32;
+        let fls = 0u32;
+
+        let e = [fls, tru, fls, fls];
+        let r: [u32; 4] = transmute(_mm_cmpgt_ps(a, b));
+        assert_eq!(r, e);
+    }
+    test_mm_cmpgt_ps();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmpge_ps() {
+        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
+        let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
+        let tru = !0u32;
+        let fls = 0u32;
+
+        let e = [fls, tru, tru, fls];
+        let r: [u32; 4] = transmute(_mm_cmpge_ps(a, b));
+        assert_eq!(r, e);
+    }
+    test_mm_cmpge_ps();
 
-            let ee = &[0i32, 1, 0, 0];
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmpneq_ps() {
+        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
+        let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
+        let tru = !0u32;
+        let fls = 0u32;
+
+        let e = [tru, tru, fls, tru];
+        let r: [u32; 4] = transmute(_mm_cmpneq_ps(a, b));
+        assert_eq!(r, e);
+    }
+    test_mm_cmpneq_ps();
 
-            for i in 0..4 {
-                let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
-                let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmpnlt_ps() {
+        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
+        let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
+        let tru = !0u32;
+        let fls = 0u32;
+
+        let e = [fls, tru, tru, tru];
+        let r: [u32; 4] = transmute(_mm_cmpnlt_ps(a, b));
+        assert_eq!(r, e);
+    }
+    test_mm_cmpnlt_ps();
 
-                let r = _mm_comilt_ss(a, b);
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmpnle_ps() {
+        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
+        let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
+        let tru = !0u32;
+        let fls = 0u32;
+
+        let e = [fls, tru, fls, tru];
+        let r: [u32; 4] = transmute(_mm_cmpnle_ps(a, b));
+        assert_eq!(r, e);
+    }
+    test_mm_cmpnle_ps();
 
-                assert_eq!(
-                    ee[i], r,
-                    "_mm_comilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                    a, b, r, ee[i], i
-                );
-            }
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmpngt_ps() {
+        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
+        let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
+        let tru = !0u32;
+        let fls = 0u32;
+
+        let e = [tru, fls, tru, tru];
+        let r: [u32; 4] = transmute(_mm_cmpngt_ps(a, b));
+        assert_eq!(r, e);
+    }
+    test_mm_cmpngt_ps();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmpnge_ps() {
+        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
+        let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
+        let tru = !0u32;
+        let fls = 0u32;
+
+        let e = [tru, fls, fls, tru];
+        let r: [u32; 4] = transmute(_mm_cmpnge_ps(a, b));
+        assert_eq!(r, e);
+    }
+    test_mm_cmpnge_ps();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmpord_ps() {
+        let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
+        let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
+        let tru = !0u32;
+        let fls = 0u32;
+
+        let e = [tru, fls, fls, fls];
+        let r: [u32; 4] = transmute(_mm_cmpord_ps(a, b));
+        assert_eq!(r, e);
+    }
+    test_mm_cmpord_ps();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cmpunord_ps() {
+        let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
+        let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
+        let tru = !0u32;
+        let fls = 0u32;
+
+        let e = [fls, tru, tru, tru];
+        let r: [u32; 4] = transmute(_mm_cmpunord_ps(a, b));
+        assert_eq!(r, e);
+    }
+    test_mm_cmpunord_ps();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_comieq_ss() {
+        let aa = &[3.0f32, 12.0, 23.0, NAN];
+        let bb = &[3.0f32, 47.5, 1.5, NAN];
+
+        let ee = &[1i32, 0, 0, 0];
+
+        for i in 0..4 {
+            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
+            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
+
+            let r = _mm_comieq_ss(a, b);
+
+            assert_eq!(
+                ee[i], r,
+                "_mm_comieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
+                a, b, r, ee[i], i
+            );
         }
-        test_mm_comilt_ss();
+    }
+    test_mm_comieq_ss();
 
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_comile_ss() {
-            let aa = &[3.0f32, 12.0, 23.0, NAN];
-            let bb = &[3.0f32, 47.5, 1.5, NAN];
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_comilt_ss() {
+        let aa = &[3.0f32, 12.0, 23.0, NAN];
+        let bb = &[3.0f32, 47.5, 1.5, NAN];
 
-            let ee = &[1i32, 1, 0, 0];
+        let ee = &[0i32, 1, 0, 0];
 
-            for i in 0..4 {
-                let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
-                let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
+        for i in 0..4 {
+            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
+            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
 
-                let r = _mm_comile_ss(a, b);
+            let r = _mm_comilt_ss(a, b);
 
-                assert_eq!(
-                    ee[i], r,
-                    "_mm_comile_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                    a, b, r, ee[i], i
-                );
-            }
+            assert_eq!(
+                ee[i], r,
+                "_mm_comilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
+                a, b, r, ee[i], i
+            );
         }
-        test_mm_comile_ss();
+    }
+    test_mm_comilt_ss();
 
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_comigt_ss() {
-            let aa = &[3.0f32, 12.0, 23.0, NAN];
-            let bb = &[3.0f32, 47.5, 1.5, NAN];
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_comile_ss() {
+        let aa = &[3.0f32, 12.0, 23.0, NAN];
+        let bb = &[3.0f32, 47.5, 1.5, NAN];
 
-            let ee = &[1i32, 0, 1, 0];
+        let ee = &[1i32, 1, 0, 0];
 
-            for i in 0..4 {
-                let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
-                let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
+        for i in 0..4 {
+            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
+            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
 
-                let r = _mm_comige_ss(a, b);
+            let r = _mm_comile_ss(a, b);
 
-                assert_eq!(
-                    ee[i], r,
-                    "_mm_comige_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                    a, b, r, ee[i], i
-                );
-            }
+            assert_eq!(
+                ee[i], r,
+                "_mm_comile_ss({:?}, {:?}) = {}, expected: {} (i={})",
+                a, b, r, ee[i], i
+            );
         }
-        test_mm_comigt_ss();
+    }
+    test_mm_comile_ss();
 
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_comineq_ss() {
-            let aa = &[3.0f32, 12.0, 23.0, NAN];
-            let bb = &[3.0f32, 47.5, 1.5, NAN];
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_comigt_ss() {
+        let aa = &[3.0f32, 12.0, 23.0, NAN];
+        let bb = &[3.0f32, 47.5, 1.5, NAN];
 
-            let ee = &[0i32, 1, 1, 1];
+        let ee = &[1i32, 0, 1, 0];
 
-            for i in 0..4 {
-                let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
-                let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
+        for i in 0..4 {
+            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
+            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
 
-                let r = _mm_comineq_ss(a, b);
+            let r = _mm_comige_ss(a, b);
 
-                assert_eq!(
-                    ee[i], r,
-                    "_mm_comineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                    a, b, r, ee[i], i
-                );
-            }
+            assert_eq!(
+                ee[i], r,
+                "_mm_comige_ss({:?}, {:?}) = {}, expected: {} (i={})",
+                a, b, r, ee[i], i
+            );
         }
-        test_mm_comineq_ss();
+    }
+    test_mm_comigt_ss();
 
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_ucomieq_ss() {
-            let aa = &[3.0f32, 12.0, 23.0, NAN];
-            let bb = &[3.0f32, 47.5, 1.5, NAN];
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_comineq_ss() {
+        let aa = &[3.0f32, 12.0, 23.0, NAN];
+        let bb = &[3.0f32, 47.5, 1.5, NAN];
 
-            let ee = &[1i32, 0, 0, 0];
+        let ee = &[0i32, 1, 1, 1];
 
-            for i in 0..4 {
-                let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
-                let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
+        for i in 0..4 {
+            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
+            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
 
-                let r = _mm_ucomieq_ss(a, b);
+            let r = _mm_comineq_ss(a, b);
 
-                assert_eq!(
-                    ee[i], r,
-                    "_mm_ucomieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                    a, b, r, ee[i], i
-                );
-            }
+            assert_eq!(
+                ee[i], r,
+                "_mm_comineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
+                a, b, r, ee[i], i
+            );
         }
-        test_mm_ucomieq_ss();
+    }
+    test_mm_comineq_ss();
 
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_ucomilt_ss() {
-            let aa = &[3.0f32, 12.0, 23.0, NAN];
-            let bb = &[3.0f32, 47.5, 1.5, NAN];
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_ucomieq_ss() {
+        let aa = &[3.0f32, 12.0, 23.0, NAN];
+        let bb = &[3.0f32, 47.5, 1.5, NAN];
 
-            let ee = &[0i32, 1, 0, 0];
+        let ee = &[1i32, 0, 0, 0];
 
-            for i in 0..4 {
-                let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
-                let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
+        for i in 0..4 {
+            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
+            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
 
-                let r = _mm_ucomilt_ss(a, b);
+            let r = _mm_ucomieq_ss(a, b);
 
-                assert_eq!(
-                    ee[i], r,
-                    "_mm_ucomilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                    a, b, r, ee[i], i
-                );
-            }
+            assert_eq!(
+                ee[i], r,
+                "_mm_ucomieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
+                a, b, r, ee[i], i
+            );
         }
-        test_mm_ucomilt_ss();
+    }
+    test_mm_ucomieq_ss();
 
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_ucomile_ss() {
-            let aa = &[3.0f32, 12.0, 23.0, NAN];
-            let bb = &[3.0f32, 47.5, 1.5, NAN];
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_ucomilt_ss() {
+        let aa = &[3.0f32, 12.0, 23.0, NAN];
+        let bb = &[3.0f32, 47.5, 1.5, NAN];
 
-            let ee = &[1i32, 1, 0, 0];
+        let ee = &[0i32, 1, 0, 0];
 
-            for i in 0..4 {
-                let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
-                let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
+        for i in 0..4 {
+            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
+            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
 
-                let r = _mm_ucomile_ss(a, b);
+            let r = _mm_ucomilt_ss(a, b);
 
-                assert_eq!(
-                    ee[i], r,
-                    "_mm_ucomile_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                    a, b, r, ee[i], i
-                );
-            }
+            assert_eq!(
+                ee[i], r,
+                "_mm_ucomilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
+                a, b, r, ee[i], i
+            );
         }
-        test_mm_ucomile_ss();
+    }
+    test_mm_ucomilt_ss();
 
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_ucomigt_ss() {
-            let aa = &[3.0f32, 12.0, 23.0, NAN];
-            let bb = &[3.0f32, 47.5, 1.5, NAN];
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_ucomile_ss() {
+        let aa = &[3.0f32, 12.0, 23.0, NAN];
+        let bb = &[3.0f32, 47.5, 1.5, NAN];
 
-            let ee = &[0i32, 0, 1, 0];
+        let ee = &[1i32, 1, 0, 0];
 
-            for i in 0..4 {
-                let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
-                let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
+        for i in 0..4 {
+            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
+            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
 
-                let r = _mm_ucomigt_ss(a, b);
+            let r = _mm_ucomile_ss(a, b);
 
-                assert_eq!(
-                    ee[i], r,
-                    "_mm_ucomigt_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                    a, b, r, ee[i], i
-                );
-            }
+            assert_eq!(
+                ee[i], r,
+                "_mm_ucomile_ss({:?}, {:?}) = {}, expected: {} (i={})",
+                a, b, r, ee[i], i
+            );
         }
-        test_mm_ucomigt_ss();
+    }
+    test_mm_ucomile_ss();
 
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_ucomige_ss() {
-            let aa = &[3.0f32, 12.0, 23.0, NAN];
-            let bb = &[3.0f32, 47.5, 1.5, NAN];
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_ucomigt_ss() {
+        let aa = &[3.0f32, 12.0, 23.0, NAN];
+        let bb = &[3.0f32, 47.5, 1.5, NAN];
 
-            let ee = &[1i32, 0, 1, 0];
+        let ee = &[0i32, 0, 1, 0];
 
-            for i in 0..4 {
-                let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
-                let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
+        for i in 0..4 {
+            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
+            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
 
-                let r = _mm_ucomige_ss(a, b);
+            let r = _mm_ucomigt_ss(a, b);
 
-                assert_eq!(
-                    ee[i], r,
-                    "_mm_ucomige_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                    a, b, r, ee[i], i
-                );
-            }
+            assert_eq!(
+                ee[i], r,
+                "_mm_ucomigt_ss({:?}, {:?}) = {}, expected: {} (i={})",
+                a, b, r, ee[i], i
+            );
         }
-        test_mm_ucomige_ss();
+    }
+    test_mm_ucomigt_ss();
 
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_ucomineq_ss() {
-            let aa = &[3.0f32, 12.0, 23.0, NAN];
-            let bb = &[3.0f32, 47.5, 1.5, NAN];
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_ucomige_ss() {
+        let aa = &[3.0f32, 12.0, 23.0, NAN];
+        let bb = &[3.0f32, 47.5, 1.5, NAN];
 
-            let ee = &[0i32, 1, 1, 1];
+        let ee = &[1i32, 0, 1, 0];
 
-            for i in 0..4 {
-                let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
-                let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
+        for i in 0..4 {
+            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
+            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
 
-                let r = _mm_ucomineq_ss(a, b);
+            let r = _mm_ucomige_ss(a, b);
 
-                assert_eq!(
-                    ee[i], r,
-                    "_mm_ucomineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
-                    a, b, r, ee[i], i
-                );
-            }
+            assert_eq!(
+                ee[i], r,
+                "_mm_ucomige_ss({:?}, {:?}) = {}, expected: {} (i={})",
+                a, b, r, ee[i], i
+            );
         }
-        test_mm_ucomineq_ss();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cvtss_si32() {
-            let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1];
-            let result = &[42i32, -3, i32::MIN, 0, i32::MIN, 2147483520];
-            for i in 0..inputs.len() {
-                let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0);
-                let e = result[i];
-                let r = _mm_cvtss_si32(x);
-                assert_eq!(
-                    e, r,
-                    "TestCase #{} _mm_cvtss_si32({:?}) = {}, expected: {}",
-                    i, x, r, e
-                );
-            }
+    }
+    test_mm_ucomige_ss();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_ucomineq_ss() {
+        let aa = &[3.0f32, 12.0, 23.0, NAN];
+        let bb = &[3.0f32, 47.5, 1.5, NAN];
+
+        let ee = &[0i32, 1, 1, 1];
+
+        for i in 0..4 {
+            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
+            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
+
+            let r = _mm_ucomineq_ss(a, b);
+
+            assert_eq!(
+                ee[i], r,
+                "_mm_ucomineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
+                a, b, r, ee[i], i
+            );
         }
-        test_mm_cvtss_si32();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cvttss_si32() {
-            let inputs = &[
-                (42.0f32, 42i32),
-                (-31.4, -31),
-                (-33.5, -33),
-                (-34.5, -34),
-                (10.999, 10),
-                (-5.99, -5),
-                (4.0e10, i32::MIN),
-                (4.0e-10, 0),
-                (NAN, i32::MIN),
-                (2147483500.1, 2147483520),
-            ];
-            for i in 0..inputs.len() {
-                let (xi, e) = inputs[i];
-                let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
-                let r = _mm_cvttss_si32(x);
-                assert_eq!(
-                    e, r,
-                    "TestCase #{} _mm_cvttss_si32({:?}) = {}, expected: {}",
-                    i, x, r, e
-                );
-            }
+    }
+    test_mm_ucomineq_ss();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cvtss_si32() {
+        let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1];
+        let result = &[42i32, -3, i32::MIN, 0, i32::MIN, 2147483520];
+        for i in 0..inputs.len() {
+            let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0);
+            let e = result[i];
+            let r = _mm_cvtss_si32(x);
+            assert_eq!(e, r, "TestCase #{} _mm_cvtss_si32({:?}) = {}, expected: {}", i, x, r, e);
         }
-        test_mm_cvttss_si32();
+    }
+    test_mm_cvtss_si32();
 
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cvtss_f32() {
-            let a = _mm_setr_ps(312.0134, 5.0, 6.0, 7.0);
-            assert_eq!(_mm_cvtss_f32(a), 312.0134);
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cvttss_si32() {
+        let inputs = &[
+            (42.0f32, 42i32),
+            (-31.4, -31),
+            (-33.5, -33),
+            (-34.5, -34),
+            (10.999, 10),
+            (-5.99, -5),
+            (4.0e10, i32::MIN),
+            (4.0e-10, 0),
+            (NAN, i32::MIN),
+            (2147483500.1, 2147483520),
+        ];
+        for i in 0..inputs.len() {
+            let (xi, e) = inputs[i];
+            let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
+            let r = _mm_cvttss_si32(x);
+            assert_eq!(e, r, "TestCase #{} _mm_cvttss_si32({:?}) = {}, expected: {}", i, x, r, e);
         }
-        test_mm_cvtss_f32();
-
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cvtsi32_ss() {
-            let inputs = &[
-                (4555i32, 4555.0f32),
-                (322223333, 322223330.0),
-                (-432, -432.0),
-                (-322223333, -322223330.0),
-            ];
-
-            for i in 0..inputs.len() {
-                let (x, f) = inputs[i];
-                let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
-                let r = _mm_cvtsi32_ss(a, x);
-                let e = _mm_setr_ps(f, 6.0, 7.0, 8.0);
-                assert_eq_m128(e, r);
-            }
+    }
+    test_mm_cvttss_si32();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cvtss_f32() {
+        let a = _mm_setr_ps(312.0134, 5.0, 6.0, 7.0);
+        assert_eq!(_mm_cvtss_f32(a), 312.0134);
+    }
+    test_mm_cvtss_f32();
+
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cvtsi32_ss() {
+        let inputs = &[
+            (4555i32, 4555.0f32),
+            (322223333, 322223330.0),
+            (-432, -432.0),
+            (-322223333, -322223330.0),
+        ];
+
+        for i in 0..inputs.len() {
+            let (x, f) = inputs[i];
+            let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
+            let r = _mm_cvtsi32_ss(a, x);
+            let e = _mm_setr_ps(f, 6.0, 7.0, 8.0);
+            assert_eq_m128(e, r);
         }
-        test_mm_cvtsi32_ss();
-
-        // Intrinsic only available on x86_64
-        #[cfg(target_arch = "x86_64")]
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cvtss_si64() {
-            let inputs = &[
-                (42.0f32, 42i64),
-                (-31.4, -31),
-                (-33.5, -34),
-                (-34.5, -34),
-                (4.0e10, 40_000_000_000),
-                (4.0e-10, 0),
-                (f32::NAN, i64::MIN),
-                (2147483500.1, 2147483520),
-                (9.223371e18, 9223370937343148032),
-            ];
-            for i in 0..inputs.len() {
-                let (xi, e) = inputs[i];
-                let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
-                let r = _mm_cvtss_si64(x);
-                assert_eq!(
-                    e, r,
-                    "TestCase #{} _mm_cvtss_si64({:?}) = {}, expected: {}",
-                    i, x, r, e
-                );
-            }
+    }
+    test_mm_cvtsi32_ss();
+
+    // Intrinsic only available on x86_64
+    #[cfg(target_arch = "x86_64")]
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cvtss_si64() {
+        let inputs = &[
+            (42.0f32, 42i64),
+            (-31.4, -31),
+            (-33.5, -34),
+            (-34.5, -34),
+            (4.0e10, 40_000_000_000),
+            (4.0e-10, 0),
+            (f32::NAN, i64::MIN),
+            (2147483500.1, 2147483520),
+            (9.223371e18, 9223370937343148032),
+        ];
+        for i in 0..inputs.len() {
+            let (xi, e) = inputs[i];
+            let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
+            let r = _mm_cvtss_si64(x);
+            assert_eq!(e, r, "TestCase #{} _mm_cvtss_si64({:?}) = {}, expected: {}", i, x, r, e);
         }
-        #[cfg(target_arch = "x86_64")]
-        test_mm_cvtss_si64();
-
-        // Intrinsic only available on x86_64
-        #[cfg(target_arch = "x86_64")]
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cvttss_si64() {
-            let inputs = &[
-                (42.0f32, 42i64),
-                (-31.4, -31),
-                (-33.5, -33),
-                (-34.5, -34),
-                (10.999, 10),
-                (-5.99, -5),
-                (4.0e10, 40_000_000_000),
-                (4.0e-10, 0),
-                (f32::NAN, i64::MIN),
-                (2147483500.1, 2147483520),
-                (9.223371e18, 9223370937343148032),
-                (9.223372e18, i64::MIN),
-            ];
-            for i in 0..inputs.len() {
-                let (xi, e) = inputs[i];
-                let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
-                let r = _mm_cvttss_si64(x);
-                assert_eq!(
-                    e, r,
-                    "TestCase #{} _mm_cvttss_si64({:?}) = {}, expected: {}",
-                    i, x, r, e
-                );
-            }
+    }
+    #[cfg(target_arch = "x86_64")]
+    test_mm_cvtss_si64();
+
+    // Intrinsic only available on x86_64
+    #[cfg(target_arch = "x86_64")]
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cvttss_si64() {
+        let inputs = &[
+            (42.0f32, 42i64),
+            (-31.4, -31),
+            (-33.5, -33),
+            (-34.5, -34),
+            (10.999, 10),
+            (-5.99, -5),
+            (4.0e10, 40_000_000_000),
+            (4.0e-10, 0),
+            (f32::NAN, i64::MIN),
+            (2147483500.1, 2147483520),
+            (9.223371e18, 9223370937343148032),
+            (9.223372e18, i64::MIN),
+        ];
+        for i in 0..inputs.len() {
+            let (xi, e) = inputs[i];
+            let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
+            let r = _mm_cvttss_si64(x);
+            assert_eq!(e, r, "TestCase #{} _mm_cvttss_si64({:?}) = {}, expected: {}", i, x, r, e);
         }
-        #[cfg(target_arch = "x86_64")]
-        test_mm_cvttss_si64();
-
-        // Intrinsic only available on x86_64
-        #[cfg(target_arch = "x86_64")]
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_cvtsi64_ss() {
-            let inputs = &[
-                (4555i64, 4555.0f32),
-                (322223333, 322223330.0),
-                (-432, -432.0),
-                (-322223333, -322223330.0),
-                (9223372036854775807, 9.223372e18),
-                (-9223372036854775808, -9.223372e18),
-            ];
-
-            for i in 0..inputs.len() {
-                let (x, f) = inputs[i];
-                let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
-                let r = _mm_cvtsi64_ss(a, x);
-                let e = _mm_setr_ps(f, 6.0, 7.0, 8.0);
-                assert_eq_m128(e, r);
-            }
+    }
+    #[cfg(target_arch = "x86_64")]
+    test_mm_cvttss_si64();
+
+    // Intrinsic only available on x86_64
+    #[cfg(target_arch = "x86_64")]
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_cvtsi64_ss() {
+        let inputs = &[
+            (4555i64, 4555.0f32),
+            (322223333, 322223330.0),
+            (-432, -432.0),
+            (-322223333, -322223330.0),
+            (9223372036854775807, 9.223372e18),
+            (-9223372036854775808, -9.223372e18),
+        ];
+
+        for i in 0..inputs.len() {
+            let (x, f) = inputs[i];
+            let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
+            let r = _mm_cvtsi64_ss(a, x);
+            let e = _mm_setr_ps(f, 6.0, 7.0, 8.0);
+            assert_eq_m128(e, r);
         }
-        #[cfg(target_arch = "x86_64")]
-        test_mm_cvtsi64_ss();
+    }
+    #[cfg(target_arch = "x86_64")]
+    test_mm_cvtsi64_ss();
 
-        #[target_feature(enable = "sse")]
-        unsafe fn test_mm_movemask_ps() {
-            let r = _mm_movemask_ps(_mm_setr_ps(-1.0, 5.0, -5.0, 0.0));
-            assert_eq!(r, 0b0101);
+    #[target_feature(enable = "sse")]
+    unsafe fn test_mm_movemask_ps() {
+        let r = _mm_movemask_ps(_mm_setr_ps(-1.0, 5.0, -5.0, 0.0));
+        assert_eq!(r, 0b0101);
 
-            let r = _mm_movemask_ps(_mm_setr_ps(-1.0, -5.0, -5.0, 0.0));
-            assert_eq!(r, 0b0111);
-        }
-        test_mm_movemask_ps();
-
-        let x = 0i8;
-        _mm_prefetch(&x, _MM_HINT_T0);
-        _mm_prefetch(&x, _MM_HINT_T1);
-        _mm_prefetch(&x, _MM_HINT_T2);
-        _mm_prefetch(&x, _MM_HINT_NTA);
-        _mm_prefetch(&x, _MM_HINT_ET0);
-        _mm_prefetch(&x, _MM_HINT_ET1);
+        let r = _mm_movemask_ps(_mm_setr_ps(-1.0, -5.0, -5.0, 0.0));
+        assert_eq!(r, 0b0111);
     }
+    test_mm_movemask_ps();
+
+    let x = 0i8;
+    _mm_prefetch(&x, _MM_HINT_T0);
+    _mm_prefetch(&x, _MM_HINT_T1);
+    _mm_prefetch(&x, _MM_HINT_T2);
+    _mm_prefetch(&x, _MM_HINT_NTA);
+    _mm_prefetch(&x, _MM_HINT_ET0);
+    _mm_prefetch(&x, _MM_HINT_ET1);
 }
diff --git a/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-sse2.rs b/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-sse2.rs
index e0088b9eb24..7aaf9c2624f 100644
--- a/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-sse2.rs
+++ b/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-sse2.rs
@@ -1,848 +1,843 @@
+// We're testing x86 target specific features
+//@only-target: x86_64 i686
+
+#[cfg(target_arch = "x86")]
+use std::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use std::arch::x86_64::*;
+use std::f64::NAN;
+use std::mem::transmute;
+
 fn main() {
-    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-    {
-        assert!(is_x86_feature_detected!("sse2"));
+    assert!(is_x86_feature_detected!("sse2"));
 
-        unsafe {
-            tests::test_sse2();
-        }
+    unsafe {
+        test_sse2();
     }
 }
 
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-mod tests {
-    #[cfg(target_arch = "x86")]
-    use std::arch::x86::*;
-    #[cfg(target_arch = "x86_64")]
-    use std::arch::x86_64::*;
-    use std::f64::NAN;
-    use std::mem::transmute;
+#[target_feature(enable = "sse2")]
+unsafe fn _mm_setr_epi64x(a: i64, b: i64) -> __m128i {
+    _mm_set_epi64x(b, a)
+}
+
+#[target_feature(enable = "sse2")]
+unsafe fn test_sse2() {
+    // Mostly copied from library/stdarch/crates/core_arch/src/x86{,_64}/sse2.rs
 
-    #[target_feature(enable = "sse2")]
     unsafe fn _mm_setr_epi64x(a: i64, b: i64) -> __m128i {
         _mm_set_epi64x(b, a)
     }
 
-    #[target_feature(enable = "sse2")]
-    pub(super) unsafe fn test_sse2() {
-        // Mostly copied from library/stdarch/crates/core_arch/src/x86{,_64}/sse2.rs
-
-        unsafe fn _mm_setr_epi64x(a: i64, b: i64) -> __m128i {
-            _mm_set_epi64x(b, a)
+    #[track_caller]
+    #[target_feature(enable = "sse")]
+    unsafe fn assert_eq_m128(a: __m128, b: __m128) {
+        let r = _mm_cmpeq_ps(a, b);
+        if _mm_movemask_ps(r) != 0b1111 {
+            panic!("{:?} != {:?}", a, b);
         }
+    }
 
-        #[track_caller]
-        #[target_feature(enable = "sse")]
-        unsafe fn assert_eq_m128(a: __m128, b: __m128) {
-            let r = _mm_cmpeq_ps(a, b);
-            if _mm_movemask_ps(r) != 0b1111 {
-                panic!("{:?} != {:?}", a, b);
-            }
-        }
+    #[track_caller]
+    #[target_feature(enable = "sse2")]
+    unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) {
+        assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b))
+    }
 
-        #[track_caller]
-        #[target_feature(enable = "sse2")]
-        unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) {
-            assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b))
+    #[track_caller]
+    #[target_feature(enable = "sse2")]
+    unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) {
+        if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 {
+            panic!("{:?} != {:?}", a, b);
         }
+    }
 
-        #[track_caller]
-        #[target_feature(enable = "sse2")]
-        unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) {
-            if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 {
-                panic!("{:?} != {:?}", a, b);
-            }
-        }
+    fn test_mm_pause() {
+        unsafe { _mm_pause() }
+    }
+    test_mm_pause();
 
-        fn test_mm_pause() {
-            unsafe { _mm_pause() }
-        }
-        test_mm_pause();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_avg_epu8() {
+        let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
+        let r = _mm_avg_epu8(a, b);
+        assert_eq_m128i(r, _mm_set1_epi8(6));
+    }
+    test_mm_avg_epu8();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_avg_epu8() {
-            let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
-            let r = _mm_avg_epu8(a, b);
-            assert_eq_m128i(r, _mm_set1_epi8(6));
-        }
-        test_mm_avg_epu8();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_avg_epu16() {
+        let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
+        let r = _mm_avg_epu16(a, b);
+        assert_eq_m128i(r, _mm_set1_epi16(6));
+    }
+    test_mm_avg_epu16();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_avg_epu16() {
-            let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
-            let r = _mm_avg_epu16(a, b);
-            assert_eq_m128i(r, _mm_set1_epi16(6));
-        }
-        test_mm_avg_epu16();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_madd_epi16() {
-            let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
-            let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
-            let r = _mm_madd_epi16(a, b);
-            let e = _mm_setr_epi32(29, 81, 149, 233);
-            assert_eq_m128i(r, e);
-
-            let a =
-                _mm_setr_epi16(i16::MAX, i16::MAX, i16::MIN, i16::MIN, i16::MIN, i16::MAX, 0, 0);
-            let b =
-                _mm_setr_epi16(i16::MAX, i16::MAX, i16::MIN, i16::MIN, i16::MAX, i16::MIN, 0, 0);
-            let r = _mm_madd_epi16(a, b);
-            let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
-            assert_eq_m128i(r, e);
-        }
-        test_mm_madd_epi16();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_madd_epi16() {
+        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+        let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
+        let r = _mm_madd_epi16(a, b);
+        let e = _mm_setr_epi32(29, 81, 149, 233);
+        assert_eq_m128i(r, e);
+
+        let a = _mm_setr_epi16(i16::MAX, i16::MAX, i16::MIN, i16::MIN, i16::MIN, i16::MAX, 0, 0);
+        let b = _mm_setr_epi16(i16::MAX, i16::MAX, i16::MIN, i16::MIN, i16::MAX, i16::MIN, 0, 0);
+        let r = _mm_madd_epi16(a, b);
+        let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_madd_epi16();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_mulhi_epi16() {
-            let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
-            let r = _mm_mulhi_epi16(a, b);
-            assert_eq_m128i(r, _mm_set1_epi16(-16));
-        }
-        test_mm_mulhi_epi16();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_mulhi_epi16() {
+        let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
+        let r = _mm_mulhi_epi16(a, b);
+        assert_eq_m128i(r, _mm_set1_epi16(-16));
+    }
+    test_mm_mulhi_epi16();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_mulhi_epu16() {
-            let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
-            let r = _mm_mulhi_epu16(a, b);
-            assert_eq_m128i(r, _mm_set1_epi16(15));
-        }
-        test_mm_mulhi_epu16();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_mul_epu32() {
-            let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
-            let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
-            let r = _mm_mul_epu32(a, b);
-            let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
-            assert_eq_m128i(r, e);
-        }
-        test_mm_mul_epu32();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_mulhi_epu16() {
+        let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
+        let r = _mm_mulhi_epu16(a, b);
+        assert_eq_m128i(r, _mm_set1_epi16(15));
+    }
+    test_mm_mulhi_epu16();
+
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_mul_epu32() {
+        let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
+        let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
+        let r = _mm_mul_epu32(a, b);
+        let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_mul_epu32();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_sad_epu8() {
-            #[rustfmt::skip]
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_sad_epu8() {
+        #[rustfmt::skip]
             let a = _mm_setr_epi8(
                 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
                 1, 2, 3, 4,
                 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
                 1, 2, 3, 4,
             );
-            let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
-            let r = _mm_sad_epu8(a, b);
-            let e = _mm_setr_epi64x(1020, 614);
-            assert_eq_m128i(r, e);
-        }
-        test_mm_sad_epu8();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_sll_epi16() {
-            let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
-            let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
-            assert_eq_m128i(
-                r,
-                _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
-            );
-            let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0));
-            assert_eq_m128i(r, a);
-            let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16));
-            assert_eq_m128i(r, _mm_set1_epi16(0));
-            let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX));
-            assert_eq_m128i(r, _mm_set1_epi16(0));
-        }
-        test_mm_sll_epi16();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_srl_epi16() {
-            let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
-            let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
-            assert_eq_m128i(r, _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0));
-            let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0));
-            assert_eq_m128i(r, a);
-            let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16));
-            assert_eq_m128i(r, _mm_set1_epi16(0));
-            let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX));
-            assert_eq_m128i(r, _mm_set1_epi16(0));
-        }
-        test_mm_srl_epi16();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_sra_epi16() {
-            let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
-            let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
-            assert_eq_m128i(r, _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10));
-            let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0));
-            assert_eq_m128i(r, a);
-            let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16));
-            assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
-            let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX));
-            assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
-        }
-        test_mm_sra_epi16();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_sll_epi32() {
-            let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
-            let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
-            assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
-            let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0));
-            assert_eq_m128i(r, a);
-            let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32));
-            assert_eq_m128i(r, _mm_set1_epi32(0));
-            let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX));
-            assert_eq_m128i(r, _mm_set1_epi32(0));
-        }
-        test_mm_sll_epi32();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_srl_epi32() {
-            let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
-            let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
-            assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
-            let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0));
-            assert_eq_m128i(r, a);
-            let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32));
-            assert_eq_m128i(r, _mm_set1_epi32(0));
-            let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX));
-            assert_eq_m128i(r, _mm_set1_epi32(0));
-        }
-        test_mm_srl_epi32();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_sra_epi32() {
-            let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
-            let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
-            assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
-            let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0));
-            assert_eq_m128i(r, a);
-            let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32));
-            assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
-            let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX));
-            assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
-        }
-        test_mm_sra_epi32();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_sll_epi64() {
-            let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
-            let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
-            assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
-            let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0));
-            assert_eq_m128i(r, a);
-            let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64));
-            assert_eq_m128i(r, _mm_set1_epi64x(0));
-            let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX));
-            assert_eq_m128i(r, _mm_set1_epi64x(0));
-        }
-        test_mm_sll_epi64();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_srl_epi64() {
-            let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
-            let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
-            assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
-            let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0));
-            assert_eq_m128i(r, a);
-            let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64));
-            assert_eq_m128i(r, _mm_set1_epi64x(0));
-            let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX));
-            assert_eq_m128i(r, _mm_set1_epi64x(0));
-        }
-        test_mm_srl_epi64();
+        let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
+        let r = _mm_sad_epu8(a, b);
+        let e = _mm_setr_epi64x(1020, 614);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_sad_epu8();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cvtepi32_ps() {
-            let a = _mm_setr_epi32(1, 2, 3, 4);
-            let r = _mm_cvtepi32_ps(a);
-            assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
-        }
-        test_mm_cvtepi32_ps();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_sll_epi16() {
+        let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
+        let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
+        assert_eq_m128i(
+            r,
+            _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
+        );
+        let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0));
+        assert_eq_m128i(r, a);
+        let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16));
+        assert_eq_m128i(r, _mm_set1_epi16(0));
+        let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX));
+        assert_eq_m128i(r, _mm_set1_epi16(0));
+    }
+    test_mm_sll_epi16();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cvtps_epi32() {
-            let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
-            let r = _mm_cvtps_epi32(a);
-            assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
-        }
-        test_mm_cvtps_epi32();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_srl_epi16() {
+        let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
+        let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
+        assert_eq_m128i(r, _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0));
+        let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0));
+        assert_eq_m128i(r, a);
+        let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16));
+        assert_eq_m128i(r, _mm_set1_epi16(0));
+        let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX));
+        assert_eq_m128i(r, _mm_set1_epi16(0));
+    }
+    test_mm_srl_epi16();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cvttps_epi32() {
-            let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
-            let r = _mm_cvttps_epi32(a);
-            assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_sra_epi16() {
+        let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
+        let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
+        assert_eq_m128i(r, _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10));
+        let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0));
+        assert_eq_m128i(r, a);
+        let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16));
+        assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
+        let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX));
+        assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
+    }
+    test_mm_sra_epi16();
 
-            let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
-            let r = _mm_cvttps_epi32(a);
-            assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
-        }
-        test_mm_cvttps_epi32();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_packs_epi16() {
-            let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
-            let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
-            let r = _mm_packs_epi16(a, b);
-            assert_eq_m128i(
-                r,
-                _mm_setr_epi8(0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F),
-            );
-        }
-        test_mm_packs_epi16();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_packus_epi16() {
-            let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
-            let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
-            let r = _mm_packus_epi16(a, b);
-            assert_eq_m128i(r, _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0));
-        }
-        test_mm_packus_epi16();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_packs_epi32() {
-            let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
-            let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
-            let r = _mm_packs_epi32(a, b);
-            assert_eq_m128i(r, _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF));
-        }
-        test_mm_packs_epi32();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_min_sd() {
-            let a = _mm_setr_pd(1.0, 2.0);
-            let b = _mm_setr_pd(5.0, 10.0);
-            let r = _mm_min_sd(a, b);
-            assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
-        }
-        test_mm_min_sd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_min_pd() {
-            let a = _mm_setr_pd(-1.0, 5.0);
-            let b = _mm_setr_pd(-100.0, 20.0);
-            let r = _mm_min_pd(a, b);
-            assert_eq_m128d(r, _mm_setr_pd(-100.0, 5.0));
-
-            // `_mm_min_pd` can **not** be implemented using the `simd_min` rust intrinsic because
-            // the semantics of `simd_min` are different to those of `_mm_min_pd` regarding handling
-            // of `-0.0`.
-            let a = _mm_setr_pd(-0.0, 0.0);
-            let b = _mm_setr_pd(0.0, 0.0);
-            let r1: [u8; 16] = transmute(_mm_min_pd(a, b));
-            let r2: [u8; 16] = transmute(_mm_min_pd(b, a));
-            let a: [u8; 16] = transmute(a);
-            let b: [u8; 16] = transmute(b);
-            assert_eq!(r1, b);
-            assert_eq!(r2, a);
-            assert_ne!(a, b); // sanity check that -0.0 is actually present
-        }
-        test_mm_min_pd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_max_sd() {
-            let a = _mm_setr_pd(1.0, 2.0);
-            let b = _mm_setr_pd(5.0, 10.0);
-            let r = _mm_max_sd(a, b);
-            assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
-        }
-        test_mm_max_sd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_max_pd() {
-            let a = _mm_setr_pd(-1.0, 5.0);
-            let b = _mm_setr_pd(-100.0, 20.0);
-            let r = _mm_max_pd(a, b);
-            assert_eq_m128d(r, _mm_setr_pd(-1.0, 20.0));
-
-            // `_mm_max_pd` can **not** be implemented using the `simd_max` rust intrinsic because
-            // the semantics of `simd_max` are different to those of `_mm_max_pd` regarding handling
-            // of `-0.0`.
-            let a = _mm_setr_pd(-0.0, 0.0);
-            let b = _mm_setr_pd(0.0, 0.0);
-            let r1: [u8; 16] = transmute(_mm_max_pd(a, b));
-            let r2: [u8; 16] = transmute(_mm_max_pd(b, a));
-            let a: [u8; 16] = transmute(a);
-            let b: [u8; 16] = transmute(b);
-            assert_eq!(r1, b);
-            assert_eq!(r2, a);
-            assert_ne!(a, b); // sanity check that -0.0 is actually present
-        }
-        test_mm_max_pd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_sqrt_sd() {
-            let a = _mm_setr_pd(1.0, 2.0);
-            let b = _mm_setr_pd(5.0, 10.0);
-            let r = _mm_sqrt_sd(a, b);
-            assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
-        }
-        test_mm_sqrt_sd();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_sll_epi32() {
+        let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
+        let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
+        assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
+        let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0));
+        assert_eq_m128i(r, a);
+        let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32));
+        assert_eq_m128i(r, _mm_set1_epi32(0));
+        let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX));
+        assert_eq_m128i(r, _mm_set1_epi32(0));
+    }
+    test_mm_sll_epi32();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_sqrt_pd() {
-            let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
-            assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
-        }
-        test_mm_sqrt_pd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmpeq_sd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            let e = _mm_setr_epi64x(!0, transmute(2.0f64));
-            let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmpeq_sd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmplt_sd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
-            let e = _mm_setr_epi64x(!0, transmute(2.0f64));
-            let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmplt_sd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmple_sd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            let e = _mm_setr_epi64x(!0, transmute(2.0f64));
-            let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmple_sd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmpgt_sd() {
-            let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            let e = _mm_setr_epi64x(!0, transmute(2.0f64));
-            let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmpgt_sd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmpge_sd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            let e = _mm_setr_epi64x(!0, transmute(2.0f64));
-            let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmpge_sd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmpord_sd() {
-            let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
-            let e = _mm_setr_epi64x(0, transmute(2.0f64));
-            let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmpord_sd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmpunord_sd() {
-            let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
-            let e = _mm_setr_epi64x(!0, transmute(2.0f64));
-            let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmpunord_sd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmpneq_sd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
-            let e = _mm_setr_epi64x(!0, transmute(2.0f64));
-            let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmpneq_sd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmpnlt_sd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
-            let e = _mm_setr_epi64x(0, transmute(2.0f64));
-            let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmpnlt_sd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmpnle_sd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            let e = _mm_setr_epi64x(0, transmute(2.0f64));
-            let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmpnle_sd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmpngt_sd() {
-            let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            let e = _mm_setr_epi64x(0, transmute(2.0f64));
-            let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmpngt_sd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmpnge_sd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            let e = _mm_setr_epi64x(0, transmute(2.0f64));
-            let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmpnge_sd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmpeq_pd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            let e = _mm_setr_epi64x(!0, 0);
-            let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmpeq_pd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmplt_pd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            let e = _mm_setr_epi64x(0, !0);
-            let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmplt_pd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmple_pd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            let e = _mm_setr_epi64x(!0, !0);
-            let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmple_pd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmpgt_pd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            let e = _mm_setr_epi64x(0, 0);
-            let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmpgt_pd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmpge_pd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            let e = _mm_setr_epi64x(!0, 0);
-            let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmpge_pd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmpord_pd() {
-            let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
-            let e = _mm_setr_epi64x(0, !0);
-            let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmpord_pd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmpunord_pd() {
-            let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
-            let e = _mm_setr_epi64x(!0, 0);
-            let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmpunord_pd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmpneq_pd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
-            let e = _mm_setr_epi64x(!0, !0);
-            let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmpneq_pd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmpnlt_pd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
-            let e = _mm_setr_epi64x(0, 0);
-            let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmpnlt_pd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmpnle_pd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            let e = _mm_setr_epi64x(0, 0);
-            let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmpnle_pd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmpngt_pd() {
-            let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            let e = _mm_setr_epi64x(0, !0);
-            let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmpngt_pd();
-
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cmpnge_pd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            let e = _mm_setr_epi64x(0, !0);
-            let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
-            assert_eq_m128i(r, e);
-        }
-        test_mm_cmpnge_pd();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_srl_epi32() {
+        let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
+        let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
+        assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
+        let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0));
+        assert_eq_m128i(r, a);
+        let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32));
+        assert_eq_m128i(r, _mm_set1_epi32(0));
+        let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX));
+        assert_eq_m128i(r, _mm_set1_epi32(0));
+    }
+    test_mm_srl_epi32();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_comieq_sd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            assert!(_mm_comieq_sd(a, b) != 0);
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_sra_epi32() {
+        let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
+        let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
+        assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
+        let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0));
+        assert_eq_m128i(r, a);
+        let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32));
+        assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
+        let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX));
+        assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
+    }
+    test_mm_sra_epi32();
 
-            let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
-            assert!(_mm_comieq_sd(a, b) == 0);
-        }
-        test_mm_comieq_sd();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_sll_epi64() {
+        let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
+        let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
+        assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
+        let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0));
+        assert_eq_m128i(r, a);
+        let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64));
+        assert_eq_m128i(r, _mm_set1_epi64x(0));
+        let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX));
+        assert_eq_m128i(r, _mm_set1_epi64x(0));
+    }
+    test_mm_sll_epi64();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_comilt_sd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            assert!(_mm_comilt_sd(a, b) == 0);
-        }
-        test_mm_comilt_sd();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_srl_epi64() {
+        let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
+        let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
+        assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
+        let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0));
+        assert_eq_m128i(r, a);
+        let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64));
+        assert_eq_m128i(r, _mm_set1_epi64x(0));
+        let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX));
+        assert_eq_m128i(r, _mm_set1_epi64x(0));
+    }
+    test_mm_srl_epi64();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_comile_sd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            assert!(_mm_comile_sd(a, b) != 0);
-        }
-        test_mm_comile_sd();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cvtepi32_ps() {
+        let a = _mm_setr_epi32(1, 2, 3, 4);
+        let r = _mm_cvtepi32_ps(a);
+        assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
+    }
+    test_mm_cvtepi32_ps();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_comigt_sd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            assert!(_mm_comigt_sd(a, b) == 0);
-        }
-        test_mm_comigt_sd();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cvtps_epi32() {
+        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+        let r = _mm_cvtps_epi32(a);
+        assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
+    }
+    test_mm_cvtps_epi32();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_comige_sd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            assert!(_mm_comige_sd(a, b) != 0);
-        }
-        test_mm_comige_sd();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cvttps_epi32() {
+        let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
+        let r = _mm_cvttps_epi32(a);
+        assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
+
+        let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
+        let r = _mm_cvttps_epi32(a);
+        assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
+    }
+    test_mm_cvttps_epi32();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_comineq_sd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            assert!(_mm_comineq_sd(a, b) == 0);
-        }
-        test_mm_comineq_sd();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_packs_epi16() {
+        let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
+        let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
+        let r = _mm_packs_epi16(a, b);
+        assert_eq_m128i(
+            r,
+            _mm_setr_epi8(0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F),
+        );
+    }
+    test_mm_packs_epi16();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_ucomieq_sd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            assert!(_mm_ucomieq_sd(a, b) != 0);
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_packus_epi16() {
+        let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
+        let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
+        let r = _mm_packus_epi16(a, b);
+        assert_eq_m128i(r, _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0));
+    }
+    test_mm_packus_epi16();
 
-            let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
-            assert!(_mm_ucomieq_sd(a, b) == 0);
-        }
-        test_mm_ucomieq_sd();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_packs_epi32() {
+        let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
+        let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
+        let r = _mm_packs_epi32(a, b);
+        assert_eq_m128i(r, _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF));
+    }
+    test_mm_packs_epi32();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_ucomilt_sd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            assert!(_mm_ucomilt_sd(a, b) == 0);
-        }
-        test_mm_ucomilt_sd();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_min_sd() {
+        let a = _mm_setr_pd(1.0, 2.0);
+        let b = _mm_setr_pd(5.0, 10.0);
+        let r = _mm_min_sd(a, b);
+        assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
+    }
+    test_mm_min_sd();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_ucomile_sd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            assert!(_mm_ucomile_sd(a, b) != 0);
-        }
-        test_mm_ucomile_sd();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_min_pd() {
+        let a = _mm_setr_pd(-1.0, 5.0);
+        let b = _mm_setr_pd(-100.0, 20.0);
+        let r = _mm_min_pd(a, b);
+        assert_eq_m128d(r, _mm_setr_pd(-100.0, 5.0));
+
+        // `_mm_min_pd` can **not** be implemented using the `simd_min` rust intrinsic because
+        // the semantics of `simd_min` are different to those of `_mm_min_pd` regarding handling
+        // of `-0.0`.
+        let a = _mm_setr_pd(-0.0, 0.0);
+        let b = _mm_setr_pd(0.0, 0.0);
+        let r1: [u8; 16] = transmute(_mm_min_pd(a, b));
+        let r2: [u8; 16] = transmute(_mm_min_pd(b, a));
+        let a: [u8; 16] = transmute(a);
+        let b: [u8; 16] = transmute(b);
+        assert_eq!(r1, b);
+        assert_eq!(r2, a);
+        assert_ne!(a, b); // sanity check that -0.0 is actually present
+    }
+    test_mm_min_pd();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_ucomigt_sd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            assert!(_mm_ucomigt_sd(a, b) == 0);
-        }
-        test_mm_ucomigt_sd();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_max_sd() {
+        let a = _mm_setr_pd(1.0, 2.0);
+        let b = _mm_setr_pd(5.0, 10.0);
+        let r = _mm_max_sd(a, b);
+        assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
+    }
+    test_mm_max_sd();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_ucomige_sd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            assert!(_mm_ucomige_sd(a, b) != 0);
-        }
-        test_mm_ucomige_sd();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_max_pd() {
+        let a = _mm_setr_pd(-1.0, 5.0);
+        let b = _mm_setr_pd(-100.0, 20.0);
+        let r = _mm_max_pd(a, b);
+        assert_eq_m128d(r, _mm_setr_pd(-1.0, 20.0));
+
+        // `_mm_max_pd` can **not** be implemented using the `simd_max` rust intrinsic because
+        // the semantics of `simd_max` are different to those of `_mm_max_pd` regarding handling
+        // of `-0.0`.
+        let a = _mm_setr_pd(-0.0, 0.0);
+        let b = _mm_setr_pd(0.0, 0.0);
+        let r1: [u8; 16] = transmute(_mm_max_pd(a, b));
+        let r2: [u8; 16] = transmute(_mm_max_pd(b, a));
+        let a: [u8; 16] = transmute(a);
+        let b: [u8; 16] = transmute(b);
+        assert_eq!(r1, b);
+        assert_eq!(r2, a);
+        assert_ne!(a, b); // sanity check that -0.0 is actually present
+    }
+    test_mm_max_pd();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_ucomineq_sd() {
-            let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
-            assert!(_mm_ucomineq_sd(a, b) == 0);
-        }
-        test_mm_ucomineq_sd();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_sqrt_sd() {
+        let a = _mm_setr_pd(1.0, 2.0);
+        let b = _mm_setr_pd(5.0, 10.0);
+        let r = _mm_sqrt_sd(a, b);
+        assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
+    }
+    test_mm_sqrt_sd();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cvtpd_ps() {
-            let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
-            assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_sqrt_pd() {
+        let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
+        assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
+    }
+    test_mm_sqrt_pd();
 
-            let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
-            assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmpeq_sd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        let e = _mm_setr_epi64x(!0, transmute(2.0f64));
+        let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmpeq_sd();
 
-            let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
-            assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmplt_sd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
+        let e = _mm_setr_epi64x(!0, transmute(2.0f64));
+        let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmplt_sd();
 
-            let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
-            assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
-        }
-        test_mm_cvtpd_ps();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmple_sd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        let e = _mm_setr_epi64x(!0, transmute(2.0f64));
+        let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmple_sd();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cvtps_pd() {
-            let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
-            assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmpgt_sd() {
+        let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        let e = _mm_setr_epi64x(!0, transmute(2.0f64));
+        let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmpgt_sd();
 
-            let r = _mm_cvtps_pd(_mm_setr_ps(f32::MAX, f32::INFINITY, f32::NEG_INFINITY, f32::MIN));
-            assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
-        }
-        test_mm_cvtps_pd();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmpge_sd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        let e = _mm_setr_epi64x(!0, transmute(2.0f64));
+        let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmpge_sd();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cvtpd_epi32() {
-            let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
-            assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmpord_sd() {
+        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
+        let e = _mm_setr_epi64x(0, transmute(2.0f64));
+        let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmpord_sd();
 
-            let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
-            assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmpunord_sd() {
+        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
+        let e = _mm_setr_epi64x(!0, transmute(2.0f64));
+        let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmpunord_sd();
 
-            let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
-            assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmpneq_sd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
+        let e = _mm_setr_epi64x(!0, transmute(2.0f64));
+        let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmpneq_sd();
 
-            let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
-            assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmpnlt_sd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
+        let e = _mm_setr_epi64x(0, transmute(2.0f64));
+        let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmpnlt_sd();
 
-            let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
-            assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
-        }
-        test_mm_cvtpd_epi32();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmpnle_sd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        let e = _mm_setr_epi64x(0, transmute(2.0f64));
+        let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmpnle_sd();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cvttpd_epi32() {
-            let a = _mm_setr_pd(-1.1, 2.2);
-            let r = _mm_cvttpd_epi32(a);
-            assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmpngt_sd() {
+        let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        let e = _mm_setr_epi64x(0, transmute(2.0f64));
+        let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmpngt_sd();
 
-            let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
-            let r = _mm_cvttpd_epi32(a);
-            assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
-        }
-        test_mm_cvttpd_epi32();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmpnge_sd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        let e = _mm_setr_epi64x(0, transmute(2.0f64));
+        let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmpnge_sd();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cvtsd_si32() {
-            let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
-            assert_eq!(r, -2);
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmpeq_pd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        let e = _mm_setr_epi64x(!0, 0);
+        let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmpeq_pd();
 
-            let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
-            assert_eq!(r, i32::MIN);
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmplt_pd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        let e = _mm_setr_epi64x(0, !0);
+        let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmplt_pd();
 
-            let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
-            assert_eq!(r, i32::MIN);
-        }
-        test_mm_cvtsd_si32();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmple_pd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        let e = _mm_setr_epi64x(!0, !0);
+        let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmple_pd();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cvttsd_si32() {
-            let a = _mm_setr_pd(-1.1, 2.2);
-            let r = _mm_cvttsd_si32(a);
-            assert_eq!(r, -1);
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmpgt_pd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        let e = _mm_setr_epi64x(0, 0);
+        let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmpgt_pd();
 
-            let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
-            let r = _mm_cvttsd_si32(a);
-            assert_eq!(r, i32::MIN);
-        }
-        test_mm_cvttsd_si32();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmpge_pd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        let e = _mm_setr_epi64x(!0, 0);
+        let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmpge_pd();
 
-        // Intrinsic only available on x86_64
-        #[cfg(target_arch = "x86_64")]
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cvtsd_si64() {
-            let r = _mm_cvtsd_si64(_mm_setr_pd(-2.0, 5.0));
-            assert_eq!(r, -2_i64);
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmpord_pd() {
+        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
+        let e = _mm_setr_epi64x(0, !0);
+        let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmpord_pd();
 
-            let r = _mm_cvtsd_si64(_mm_setr_pd(f64::MAX, f64::MIN));
-            assert_eq!(r, i64::MIN);
-        }
-        #[cfg(target_arch = "x86_64")]
-        test_mm_cvtsd_si64();
-
-        // Intrinsic only available on x86_64
-        #[cfg(target_arch = "x86_64")]
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cvttsd_si64() {
-            let a = _mm_setr_pd(-1.1, 2.2);
-            let r = _mm_cvttsd_si64(a);
-            assert_eq!(r, -1_i64);
-        }
-        #[cfg(target_arch = "x86_64")]
-        test_mm_cvttsd_si64();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmpunord_pd() {
+        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
+        let e = _mm_setr_epi64x(!0, 0);
+        let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmpunord_pd();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cvtsd_ss() {
-            let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
-            let b = _mm_setr_pd(2.0, -5.0);
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmpneq_pd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
+        let e = _mm_setr_epi64x(!0, !0);
+        let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmpneq_pd();
 
-            let r = _mm_cvtsd_ss(a, b);
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmpnlt_pd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
+        let e = _mm_setr_epi64x(0, 0);
+        let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmpnlt_pd();
 
-            assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmpnle_pd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        let e = _mm_setr_epi64x(0, 0);
+        let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmpnle_pd();
 
-            let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
-            let b = _mm_setr_pd(f64::INFINITY, -5.0);
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmpngt_pd() {
+        let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        let e = _mm_setr_epi64x(0, !0);
+        let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmpngt_pd();
 
-            let r = _mm_cvtsd_ss(a, b);
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cmpnge_pd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        let e = _mm_setr_epi64x(0, !0);
+        let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
+        assert_eq_m128i(r, e);
+    }
+    test_mm_cmpnge_pd();
 
-            assert_eq_m128(
-                r,
-                _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY),
-            );
-        }
-        test_mm_cvtsd_ss();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_comieq_sd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        assert!(_mm_comieq_sd(a, b) != 0);
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_cvtss_sd() {
-            let a = _mm_setr_pd(-1.1, 2.2);
-            let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
+        assert!(_mm_comieq_sd(a, b) == 0);
+    }
+    test_mm_comieq_sd();
 
-            let r = _mm_cvtss_sd(a, b);
-            assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_comilt_sd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        assert!(_mm_comilt_sd(a, b) == 0);
+    }
+    test_mm_comilt_sd();
 
-            let a = _mm_setr_pd(-1.1, f64::INFINITY);
-            let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_comile_sd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        assert!(_mm_comile_sd(a, b) != 0);
+    }
+    test_mm_comile_sd();
 
-            let r = _mm_cvtss_sd(a, b);
-            assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
-        }
-        test_mm_cvtss_sd();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_comigt_sd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        assert!(_mm_comigt_sd(a, b) == 0);
+    }
+    test_mm_comigt_sd();
 
-        #[target_feature(enable = "sse2")]
-        unsafe fn test_mm_movemask_pd() {
-            let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
-            assert_eq!(r, 0b01);
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_comige_sd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        assert!(_mm_comige_sd(a, b) != 0);
+    }
+    test_mm_comige_sd();
 
-            let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
-            assert_eq!(r, 0b11);
-        }
-        test_mm_movemask_pd();
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_comineq_sd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        assert!(_mm_comineq_sd(a, b) == 0);
+    }
+    test_mm_comineq_sd();
+
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_ucomieq_sd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        assert!(_mm_ucomieq_sd(a, b) != 0);
+
+        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
+        assert!(_mm_ucomieq_sd(a, b) == 0);
+    }
+    test_mm_ucomieq_sd();
+
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_ucomilt_sd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        assert!(_mm_ucomilt_sd(a, b) == 0);
+    }
+    test_mm_ucomilt_sd();
+
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_ucomile_sd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        assert!(_mm_ucomile_sd(a, b) != 0);
+    }
+    test_mm_ucomile_sd();
+
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_ucomigt_sd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        assert!(_mm_ucomigt_sd(a, b) == 0);
+    }
+    test_mm_ucomigt_sd();
+
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_ucomige_sd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        assert!(_mm_ucomige_sd(a, b) != 0);
+    }
+    test_mm_ucomige_sd();
+
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_ucomineq_sd() {
+        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
+        assert!(_mm_ucomineq_sd(a, b) == 0);
+    }
+    test_mm_ucomineq_sd();
+
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cvtpd_ps() {
+        let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
+        assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
+
+        let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
+        assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
+
+        let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
+        assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
+
+        let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
+        assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
+    }
+    test_mm_cvtpd_ps();
+
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cvtps_pd() {
+        let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
+        assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
+
+        let r = _mm_cvtps_pd(_mm_setr_ps(f32::MAX, f32::INFINITY, f32::NEG_INFINITY, f32::MIN));
+        assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
+    }
+    test_mm_cvtps_pd();
+
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cvtpd_epi32() {
+        let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
+        assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
+
+        let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
+        assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
+
+        let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
+        assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
+
+        let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
+        assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
+
+        let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
+        assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
+    }
+    test_mm_cvtpd_epi32();
+
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cvttpd_epi32() {
+        let a = _mm_setr_pd(-1.1, 2.2);
+        let r = _mm_cvttpd_epi32(a);
+        assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
+
+        let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
+        let r = _mm_cvttpd_epi32(a);
+        assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
+    }
+    test_mm_cvttpd_epi32();
+
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cvtsd_si32() {
+        let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
+        assert_eq!(r, -2);
+
+        let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
+        assert_eq!(r, i32::MIN);
+
+        let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
+        assert_eq!(r, i32::MIN);
+    }
+    test_mm_cvtsd_si32();
+
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cvttsd_si32() {
+        let a = _mm_setr_pd(-1.1, 2.2);
+        let r = _mm_cvttsd_si32(a);
+        assert_eq!(r, -1);
+
+        let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
+        let r = _mm_cvttsd_si32(a);
+        assert_eq!(r, i32::MIN);
+    }
+    test_mm_cvttsd_si32();
+
+    // Intrinsic only available on x86_64
+    #[cfg(target_arch = "x86_64")]
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cvtsd_si64() {
+        let r = _mm_cvtsd_si64(_mm_setr_pd(-2.0, 5.0));
+        assert_eq!(r, -2_i64);
+
+        let r = _mm_cvtsd_si64(_mm_setr_pd(f64::MAX, f64::MIN));
+        assert_eq!(r, i64::MIN);
+    }
+    #[cfg(target_arch = "x86_64")]
+    test_mm_cvtsd_si64();
+
+    // Intrinsic only available on x86_64
+    #[cfg(target_arch = "x86_64")]
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cvttsd_si64() {
+        let a = _mm_setr_pd(-1.1, 2.2);
+        let r = _mm_cvttsd_si64(a);
+        assert_eq!(r, -1_i64);
+    }
+    #[cfg(target_arch = "x86_64")]
+    test_mm_cvttsd_si64();
+
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cvtsd_ss() {
+        let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
+        let b = _mm_setr_pd(2.0, -5.0);
+
+        let r = _mm_cvtsd_ss(a, b);
+
+        assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
+
+        let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
+        let b = _mm_setr_pd(f64::INFINITY, -5.0);
+
+        let r = _mm_cvtsd_ss(a, b);
+
+        assert_eq_m128(
+            r,
+            _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY),
+        );
+    }
+    test_mm_cvtsd_ss();
+
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_cvtss_sd() {
+        let a = _mm_setr_pd(-1.1, 2.2);
+        let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+
+        let r = _mm_cvtss_sd(a, b);
+        assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
+
+        let a = _mm_setr_pd(-1.1, f64::INFINITY);
+        let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
+
+        let r = _mm_cvtss_sd(a, b);
+        assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
+    }
+    test_mm_cvtss_sd();
+
+    #[target_feature(enable = "sse2")]
+    unsafe fn test_mm_movemask_pd() {
+        let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
+        assert_eq!(r, 0b01);
+
+        let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
+        assert_eq!(r, 0b11);
     }
+    test_mm_movemask_pd();
 }