about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2023-09-29 15:26:51 +0000
committerbors <bors@rust-lang.org>2023-09-29 15:26:51 +0000
commita2395767f46a421f5254095f4d3950729cd3cd16 (patch)
tree4d0943f90f62d1926db042bd08c85b068d62e6ac
parent7619edbe7a1a100851ce0b573a589709297c7b23 (diff)
parent0c0c088e57c2dbb7a4145bf2bee1ca1cc137bf50 (diff)
downloadrust-a2395767f46a421f5254095f4d3950729cd3cd16.tar.gz
rust-a2395767f46a421f5254095f4d3950729cd3cd16.zip
Auto merge of #3093 - eduardosm:llvm.x86.sse2.pmadd.wd, r=RalfJung
Implement the `llvm.x86.sse2.pmadd.wd` intrinsic
-rw-r--r--src/tools/miri/src/shims/x86/sse2.rs36
-rw-r--r--src/tools/miri/tests/pass/intrinsics-x86-sse2.rs18
2 files changed, 54 insertions, 0 deletions
diff --git a/src/tools/miri/src/shims/x86/sse2.rs b/src/tools/miri/src/shims/x86/sse2.rs
index 2ca882167bf..2ef6a9b59ed 100644
--- a/src/tools/miri/src/shims/x86/sse2.rs
+++ b/src/tools/miri/src/shims/x86/sse2.rs
@@ -82,6 +82,42 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                     this.write_immediate(*res, &dest)?;
                 }
             }
+            // Used to implement the _mm_madd_epi16 function.
+            // Multiplies packed signed 16-bit integers in `left` and `right`, producing
+            // intermediate signed 32-bit integers. Horizontally add adjacent pairs of
+            // intermediate 32-bit integers, and pack the results in `dest`.
+            "pmadd.wd" => {
+                let [left, right] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (left, left_len) = this.operand_to_simd(left)?;
+                let (right, right_len) = this.operand_to_simd(right)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(left_len, right_len);
+                assert_eq!(dest_len.checked_mul(2).unwrap(), left_len);
+
+                for i in 0..dest_len {
+                    let j1 = i.checked_mul(2).unwrap();
+                    let left1 = this.read_scalar(&this.project_index(&left, j1)?)?.to_i16()?;
+                    let right1 = this.read_scalar(&this.project_index(&right, j1)?)?.to_i16()?;
+
+                    let j2 = j1.checked_add(1).unwrap();
+                    let left2 = this.read_scalar(&this.project_index(&left, j2)?)?.to_i16()?;
+                    let right2 = this.read_scalar(&this.project_index(&right, j2)?)?.to_i16()?;
+
+                    let dest = this.project_index(&dest, i)?;
+
+                    // Multiplications are i16*i16->i32, which will not overflow.
+                    let mul1 = i32::from(left1).checked_mul(right1.into()).unwrap();
+                    let mul2 = i32::from(left2).checked_mul(right2.into()).unwrap();
+                    // However, this addition can overflow in the most extreme case
+                    // (-0x8000)*(-0x8000)+(-0x8000)*(-0x8000) = 0x80000000
+                    let res = mul1.wrapping_add(mul2);
+
+                    this.write_scalar(Scalar::from_i32(res), &dest)?;
+                }
+            }
             // Used to implement the _mm_mulhi_epi16 and _mm_mulhi_epu16 functions.
             "pmulh.w" | "pmulhu.w" => {
                 let [left, right] =
diff --git a/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs b/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs
index fa9df04d368..2c7665bc736 100644
--- a/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs
+++ b/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs
@@ -71,6 +71,24 @@ mod tests {
         test_mm_avg_epu16();
 
         #[target_feature(enable = "sse2")]
+        unsafe fn test_mm_madd_epi16() {
+            let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+            let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
+            let r = _mm_madd_epi16(a, b);
+            let e = _mm_setr_epi32(29, 81, 149, 233);
+            assert_eq_m128i(r, e);
+
+            let a =
+                _mm_setr_epi16(i16::MAX, i16::MAX, i16::MIN, i16::MIN, i16::MIN, i16::MAX, 0, 0);
+            let b =
+                _mm_setr_epi16(i16::MAX, i16::MAX, i16::MIN, i16::MIN, i16::MAX, i16::MIN, 0, 0);
+            let r = _mm_madd_epi16(a, b);
+            let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
+            assert_eq_m128i(r, e);
+        }
+        test_mm_madd_epi16();
+
+        #[target_feature(enable = "sse2")]
         unsafe fn test_mm_mulhi_epi16() {
             let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
             let r = _mm_mulhi_epi16(a, b);