about summary refs log tree commit diff
path: root/library/stdarch/crates
diff options
context:
space:
mode:
authorAmanieu d'Antras <amanieu@gmail.com>2021-05-20 01:47:38 +0100
committerGitHub <noreply@github.com>2021-05-20 01:47:38 +0100
commitb216e9f9c4b0c7c7298ef386f5463759f690ff89 (patch)
tree05f75c7f4c2e0d87dba010807bbf261521bdcb11 /library/stdarch/crates
parent15749b0ed353ee5c9218ca997cea53286d168bc9 (diff)
downloadrust-b216e9f9c4b0c7c7298ef386f5463759f690ff89.tar.gz
rust-b216e9f9c4b0c7c7298ef386f5463759f690ff89.zip
Fix x86 SIMD byte shift intrinsics (#1168)
Diffstat (limited to 'library/stdarch/crates')
-rw-r--r--library/stdarch/crates/core_arch/src/x86/avx2.rs72
-rw-r--r--library/stdarch/crates/core_arch/src/x86/avx512bw.rs136
-rw-r--r--library/stdarch/crates/core_arch/src/x86/sse2.rs5
3 files changed, 115 insertions, 98 deletions
diff --git a/library/stdarch/crates/core_arch/src/x86/avx2.rs b/library/stdarch/crates/core_arch/src/x86/avx2.rs
index e0c7fcfab69..081609ecea7 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx2.rs
@@ -2585,44 +2585,52 @@ pub unsafe fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
     static_assert_imm8!(IMM8);
+    const fn mask(shift: i32, i: u32) -> u32 {
+        let shift = shift as u32 & 0xff;
+        if shift > 15 || i % 16 < shift {
+            0
+        } else {
+            32 + (i - shift)
+        }
+    }
     let a = a.as_i8x32();
     let zero = _mm256_setzero_si256().as_i8x32();
     let r: i8x32 = simd_shuffle32!(
         zero,
         a,
         <const IMM8: i32> [
-            32 - (IMM8 as u32 & 0xff),
-            33 - (IMM8 as u32 & 0xff),
-            34 - (IMM8 as u32 & 0xff),
-            35 - (IMM8 as u32 & 0xff),
-            36 - (IMM8 as u32 & 0xff),
-            37 - (IMM8 as u32 & 0xff),
-            38 - (IMM8 as u32 & 0xff),
-            39 - (IMM8 as u32 & 0xff),
-            40 - (IMM8 as u32 & 0xff),
-            41 - (IMM8 as u32 & 0xff),
-            42 - (IMM8 as u32 & 0xff),
-            43 - (IMM8 as u32 & 0xff),
-            44 - (IMM8 as u32 & 0xff),
-            45 - (IMM8 as u32 & 0xff),
-            46 - (IMM8 as u32 & 0xff),
-            47 - (IMM8 as u32 & 0xff),
-            48 - (IMM8 as u32 & 0xff) - 16,
-            49 - (IMM8 as u32 & 0xff) - 16,
-            50 - (IMM8 as u32 & 0xff) - 16,
-            51 - (IMM8 as u32 & 0xff) - 16,
-            52 - (IMM8 as u32 & 0xff) - 16,
-            53 - (IMM8 as u32 & 0xff) - 16,
-            54 - (IMM8 as u32 & 0xff) - 16,
-            55 - (IMM8 as u32 & 0xff) - 16,
-            56 - (IMM8 as u32 & 0xff) - 16,
-            57 - (IMM8 as u32 & 0xff) - 16,
-            58 - (IMM8 as u32 & 0xff) - 16,
-            59 - (IMM8 as u32 & 0xff) - 16,
-            60 - (IMM8 as u32 & 0xff) - 16,
-            61 - (IMM8 as u32 & 0xff) - 16,
-            62 - (IMM8 as u32 & 0xff) - 16,
-            63 - (IMM8 as u32 & 0xff) - 16,
+            mask(IMM8, 0),
+            mask(IMM8, 1),
+            mask(IMM8, 2),
+            mask(IMM8, 3),
+            mask(IMM8, 4),
+            mask(IMM8, 5),
+            mask(IMM8, 6),
+            mask(IMM8, 7),
+            mask(IMM8, 8),
+            mask(IMM8, 9),
+            mask(IMM8, 10),
+            mask(IMM8, 11),
+            mask(IMM8, 12),
+            mask(IMM8, 13),
+            mask(IMM8, 14),
+            mask(IMM8, 15),
+            mask(IMM8, 16),
+            mask(IMM8, 17),
+            mask(IMM8, 18),
+            mask(IMM8, 19),
+            mask(IMM8, 20),
+            mask(IMM8, 21),
+            mask(IMM8, 22),
+            mask(IMM8, 23),
+            mask(IMM8, 24),
+            mask(IMM8, 25),
+            mask(IMM8, 26),
+            mask(IMM8, 27),
+            mask(IMM8, 28),
+            mask(IMM8, 29),
+            mask(IMM8, 30),
+            mask(IMM8, 31),
         ],
     );
     transmute(r)
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
index 8c2c9a2058e..4834f19edd0 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
@@ -8873,76 +8873,84 @@ pub unsafe fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i {
 #[rustc_legacy_const_generics(1)]
 pub unsafe fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
     static_assert_imm8!(IMM8);
+    const fn mask(shift: i32, i: u32) -> u32 {
+        let shift = shift as u32 & 0xff;
+        if shift > 15 || i % 16 < shift {
+            0
+        } else {
+            64 + (i - shift)
+        }
+    }
     let a = a.as_i8x64();
     let zero = _mm512_setzero_si512().as_i8x64();
     let r: i8x64 = simd_shuffle64!(
         zero,
         a,
         <const IMM8: i32> [
-            64 - (IMM8 as u32 & 0xff),
-            65 - (IMM8 as u32 & 0xff),
-            66 - (IMM8 as u32 & 0xff),
-            67 - (IMM8 as u32 & 0xff),
-            68 - (IMM8 as u32 & 0xff),
-            69 - (IMM8 as u32 & 0xff),
-            70 - (IMM8 as u32 & 0xff),
-            71 - (IMM8 as u32 & 0xff),
-            72 - (IMM8 as u32 & 0xff),
-            73 - (IMM8 as u32 & 0xff),
-            74 - (IMM8 as u32 & 0xff),
-            75 - (IMM8 as u32 & 0xff),
-            76 - (IMM8 as u32 & 0xff),
-            77 - (IMM8 as u32 & 0xff),
-            78 - (IMM8 as u32 & 0xff),
-            79 - (IMM8 as u32 & 0xff),
-            80 - (IMM8 as u32 & 0xff) - 16,
-            81 - (IMM8 as u32 & 0xff) - 16,
-            82 - (IMM8 as u32 & 0xff) - 16,
-            83 - (IMM8 as u32 & 0xff) - 16,
-            84 - (IMM8 as u32 & 0xff) - 16,
-            85 - (IMM8 as u32 & 0xff) - 16,
-            86 - (IMM8 as u32 & 0xff) - 16,
-            87 - (IMM8 as u32 & 0xff) - 16,
-            88 - (IMM8 as u32 & 0xff) - 16,
-            89 - (IMM8 as u32 & 0xff) - 16,
-            90 - (IMM8 as u32 & 0xff) - 16,
-            91 - (IMM8 as u32 & 0xff) - 16,
-            92 - (IMM8 as u32 & 0xff) - 16,
-            93 - (IMM8 as u32 & 0xff) - 16,
-            94 - (IMM8 as u32 & 0xff) - 16,
-            95 - (IMM8 as u32 & 0xff) - 16,
-            96 - (IMM8 as u32 & 0xff) - 32,
-            97 - (IMM8 as u32 & 0xff) - 32,
-            98 - (IMM8 as u32 & 0xff) - 32,
-            99 - (IMM8 as u32 & 0xff) - 32,
-            100 - (IMM8 as u32 & 0xff) - 32,
-            101 - (IMM8 as u32 & 0xff) - 32,
-            102 - (IMM8 as u32 & 0xff) - 32,
-            103 - (IMM8 as u32 & 0xff) - 32,
-            104 - (IMM8 as u32 & 0xff) - 32,
-            105 - (IMM8 as u32 & 0xff) - 32,
-            106 - (IMM8 as u32 & 0xff) - 32,
-            107 - (IMM8 as u32 & 0xff) - 32,
-            108 - (IMM8 as u32 & 0xff) - 32,
-            109 - (IMM8 as u32 & 0xff) - 32,
-            110 - (IMM8 as u32 & 0xff) - 32,
-            111 - (IMM8 as u32 & 0xff) - 32,
-            112 - (IMM8 as u32 & 0xff) - 48,
-            113 - (IMM8 as u32 & 0xff) - 48,
-            114 - (IMM8 as u32 & 0xff) - 48,
-            115 - (IMM8 as u32 & 0xff) - 48,
-            116 - (IMM8 as u32 & 0xff) - 48,
-            117 - (IMM8 as u32 & 0xff) - 48,
-            118 - (IMM8 as u32 & 0xff) - 48,
-            119 - (IMM8 as u32 & 0xff) - 48,
-            120 - (IMM8 as u32 & 0xff) - 48,
-            121 - (IMM8 as u32 & 0xff) - 48,
-            122 - (IMM8 as u32 & 0xff) - 48,
-            123 - (IMM8 as u32 & 0xff) - 48,
-            124 - (IMM8 as u32 & 0xff) - 48,
-            125 - (IMM8 as u32 & 0xff) - 48,
-            126 - (IMM8 as u32 & 0xff) - 48,
-            127 - (IMM8 as u32 & 0xff) - 48,
+            mask(IMM8, 0),
+            mask(IMM8, 1),
+            mask(IMM8, 2),
+            mask(IMM8, 3),
+            mask(IMM8, 4),
+            mask(IMM8, 5),
+            mask(IMM8, 6),
+            mask(IMM8, 7),
+            mask(IMM8, 8),
+            mask(IMM8, 9),
+            mask(IMM8, 10),
+            mask(IMM8, 11),
+            mask(IMM8, 12),
+            mask(IMM8, 13),
+            mask(IMM8, 14),
+            mask(IMM8, 15),
+            mask(IMM8, 16),
+            mask(IMM8, 17),
+            mask(IMM8, 18),
+            mask(IMM8, 19),
+            mask(IMM8, 20),
+            mask(IMM8, 21),
+            mask(IMM8, 22),
+            mask(IMM8, 23),
+            mask(IMM8, 24),
+            mask(IMM8, 25),
+            mask(IMM8, 26),
+            mask(IMM8, 27),
+            mask(IMM8, 28),
+            mask(IMM8, 29),
+            mask(IMM8, 30),
+            mask(IMM8, 31),
+            mask(IMM8, 32),
+            mask(IMM8, 33),
+            mask(IMM8, 34),
+            mask(IMM8, 35),
+            mask(IMM8, 36),
+            mask(IMM8, 37),
+            mask(IMM8, 38),
+            mask(IMM8, 39),
+            mask(IMM8, 40),
+            mask(IMM8, 41),
+            mask(IMM8, 42),
+            mask(IMM8, 43),
+            mask(IMM8, 44),
+            mask(IMM8, 45),
+            mask(IMM8, 46),
+            mask(IMM8, 47),
+            mask(IMM8, 48),
+            mask(IMM8, 49),
+            mask(IMM8, 50),
+            mask(IMM8, 51),
+            mask(IMM8, 52),
+            mask(IMM8, 53),
+            mask(IMM8, 54),
+            mask(IMM8, 55),
+            mask(IMM8, 56),
+            mask(IMM8, 57),
+            mask(IMM8, 58),
+            mask(IMM8, 59),
+            mask(IMM8, 60),
+            mask(IMM8, 61),
+            mask(IMM8, 62),
+            mask(IMM8, 63),
         ],
     );
     transmute(r)
diff --git a/library/stdarch/crates/core_arch/src/x86/sse2.rs b/library/stdarch/crates/core_arch/src/x86/sse2.rs
index 2f3e719668a..307dae77d27 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse2.rs
@@ -425,10 +425,11 @@ pub unsafe fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
     const fn mask(shift: i32, i: u32) -> u32 {
-        if (shift as u32) > 15 {
+        let shift = shift as u32 & 0xff;
+        if shift > 15 {
             i
         } else {
-            16 - (shift as u32) + i
+            16 - shift + i
         }
     }
     let zero = _mm_set1_epi8(0).as_i8x16();