about summary refs log tree commit diff
path: root/compiler/rustc_codegen_gcc
diff options
context:
space:
mode:
authorOli Scherer <git-spam-no-reply9815368754983@oli-obk.de>2023-07-10 13:03:48 +0000
committerOli Scherer <git-spam-no-reply9815368754983@oli-obk.de>2023-08-03 09:29:00 +0000
commit4457ef2c6db2aa65aeaba084d2d85f3355595f5a (patch)
treeeff5cae1af3af7c260958a78dc1e60c949b1e060 /compiler/rustc_codegen_gcc
parent2e6ac7fe5bb96bcc2d8c0b3e20db7cf7d2baa241 (diff)
downloadrust-4457ef2c6db2aa65aeaba084d2d85f3355595f5a.tar.gz
rust-4457ef2c6db2aa65aeaba084d2d85f3355595f5a.zip
Forbid old-style `simd_shuffleN` intrinsics
Diffstat (limited to 'compiler/rustc_codegen_gcc')
-rw-r--r--compiler/rustc_codegen_gcc/src/intrinsic/simd.rs445
1 files changed, 250 insertions, 195 deletions
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
index f27de867d36..85d3e7234a0 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
@@ -1,11 +1,11 @@
-#[cfg(feature="master")]
-use gccjit::{ComparisonOp, UnaryOp};
 use gccjit::ToRValue;
 use gccjit::{BinaryOp, RValue, Type};
+#[cfg(feature = "master")]
+use gccjit::{ComparisonOp, UnaryOp};
 
 use rustc_codegen_ssa::base::compare_simd_types;
 use rustc_codegen_ssa::common::{IntPredicate, TypeKind};
-#[cfg(feature="master")]
+#[cfg(feature = "master")]
 use rustc_codegen_ssa::errors::ExpectedPointerMutability;
 use rustc_codegen_ssa::errors::InvalidMonomorphization;
 use rustc_codegen_ssa::mir::operand::OperandRef;
@@ -19,7 +19,7 @@ use rustc_span::{sym, Span, Symbol};
 use rustc_target::abi::Align;
 
 use crate::builder::Builder;
-#[cfg(feature="master")]
+#[cfg(feature = "master")]
 use crate::context::CodegenCx;
 
 pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
@@ -57,7 +57,10 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
     let arg_tys = sig.inputs();
 
     if name == sym::simd_select_bitmask {
-        require_simd!(arg_tys[1], InvalidMonomorphization::SimdArgument { span, name, ty: arg_tys[1] });
+        require_simd!(
+            arg_tys[1],
+            InvalidMonomorphization::SimdArgument { span, name, ty: arg_tys[1] }
+        );
         let (len, _) = arg_tys[1].simd_size_and_type(bx.tcx());
 
         let expected_int_bits = (len.max(8) - 1).next_power_of_two();
@@ -95,7 +98,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         // NOTE: since the arguments can be vectors of floats, make sure the mask is a vector of
         // integer.
         let mask_element_type = bx.type_ix(arg1_element_type.get_size() as u64 * 8);
-        let vector_mask_type = bx.context.new_vector_type(mask_element_type, arg1_vector_type.get_num_units() as u64);
+        let vector_mask_type =
+            bx.context.new_vector_type(mask_element_type, arg1_vector_type.get_num_units() as u64);
 
         let mut elements = vec![];
         let one = bx.context.new_rvalue_one(mask.get_type());
@@ -149,38 +153,24 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         // compare them as equal, so bitcast.
         // FIXME(antoyo): allow comparing vector types as equal in libgccjit.
         let arg2 = bx.context.new_bitcast(None, args[1].immediate(), arg1.get_type());
-        return Ok(compare_simd_types(
-            bx,
-            arg1,
-            arg2,
-            in_elem,
-            llret_ty,
-            cmp_op,
-        ));
+        return Ok(compare_simd_types(bx, arg1, arg2, in_elem, llret_ty, cmp_op));
     }
 
-    if let Some(stripped) = name.as_str().strip_prefix("simd_shuffle") {
-        let n: u64 = if stripped.is_empty() {
-            // Make sure this is actually an array, since typeck only checks the length-suffixed
-            // version of this intrinsic.
-            match args[2].layout.ty.kind() {
-                ty::Array(ty, len) if matches!(ty.kind(), ty::Uint(ty::UintTy::U32)) => {
-                    len.try_eval_target_usize(bx.cx.tcx, ty::ParamEnv::reveal_all()).unwrap_or_else(
-                        || span_bug!(span, "could not evaluate shuffle index array length"),
-                    )
-                }
-                _ => return_error!(InvalidMonomorphization::SimdShuffle {
-                    span,
-                    name,
-                    ty: args[2].layout.ty
-                }),
+    if name == sym::simd_shuffle {
+        // Make sure this is actually an array, since typeck only checks the length-suffixed
+        // version of this intrinsic.
+        let n: u64 = match args[2].layout.ty.kind() {
+            ty::Array(ty, len) if matches!(ty.kind(), ty::Uint(ty::UintTy::U32)) => {
+                len.try_eval_target_usize(bx.cx.tcx, ty::ParamEnv::reveal_all()).unwrap_or_else(
+                    || span_bug!(span, "could not evaluate shuffle index array length"),
+                )
             }
-        } else {
-            stripped.parse().unwrap_or_else(|_| {
-                span_bug!(span, "bad `simd_shuffle` instruction only caught in codegen?")
-            })
+            _ => return_error!(InvalidMonomorphization::SimdShuffle {
+                span,
+                name,
+                ty: args[2].layout.ty
+            }),
         };
-
         require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty });
 
         let (out_len, out_ty) = ret_ty.simd_size_and_type(bx.tcx());
@@ -202,7 +192,13 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
     if name == sym::simd_insert {
         require!(
             in_elem == arg_tys[2],
-            InvalidMonomorphization::InsertedType { span, name, in_elem, in_ty, out_ty: arg_tys[2] }
+            InvalidMonomorphization::InsertedType {
+                span,
+                name,
+                in_elem,
+                in_ty,
+                out_ty: arg_tys[2]
+            }
         );
         let vector = args[0].immediate();
         let index = args[1].immediate();
@@ -228,7 +224,10 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
     if name == sym::simd_select {
         let m_elem_ty = in_elem;
         let m_len = in_len;
-        require_simd!(arg_tys[1], InvalidMonomorphization::SimdArgument { span, name, ty: arg_tys[1] });
+        require_simd!(
+            arg_tys[1],
+            InvalidMonomorphization::SimdArgument { span, name, ty: arg_tys[1] }
+        );
         let (v_len, _) = arg_tys[1].simd_size_and_type(bx.tcx());
         require!(
             m_len == v_len,
@@ -241,7 +240,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         return Ok(bx.vector_select(args[0].immediate(), args[1].immediate(), args[2].immediate()));
     }
 
-    #[cfg(feature="master")]
+    #[cfg(feature = "master")]
     if name == sym::simd_cast || name == sym::simd_as {
         require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty });
         let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());
@@ -267,19 +266,17 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             Unsupported,
         }
 
-        let in_style =
-            match in_elem.kind() {
-                ty::Int(_) | ty::Uint(_) => Style::Int,
-                ty::Float(_) => Style::Float,
-                 _ => Style::Unsupported,
-            };
+        let in_style = match in_elem.kind() {
+            ty::Int(_) | ty::Uint(_) => Style::Int,
+            ty::Float(_) => Style::Float,
+            _ => Style::Unsupported,
+        };
 
-        let out_style =
-            match out_elem.kind() {
-                ty::Int(_) | ty::Uint(_) => Style::Int,
-                ty::Float(_) => Style::Float,
-                 _ => Style::Unsupported,
-            };
+        let out_style = match out_elem.kind() {
+            ty::Int(_) | ty::Uint(_) => Style::Int,
+            ty::Float(_) => Style::Float,
+            _ => Style::Unsupported,
+        };
 
         match (in_style, out_style) {
             (Style::Unsupported, Style::Unsupported) => {
@@ -294,7 +291,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                         out_elem
                     }
                 );
-            },
+            }
             _ => return Ok(bx.context.convert_vector(None, args[0].immediate(), llret_ty)),
         }
     }
@@ -342,10 +339,13 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
 
         let mut shift = 0;
         for i in 0..in_len {
-            let elem = bx.extract_element(vector, bx.context.new_rvalue_from_int(bx.int_type, i as i32));
+            let elem =
+                bx.extract_element(vector, bx.context.new_rvalue_from_int(bx.int_type, i as i32));
             let shifted = elem >> sign_shift;
             let masked = shifted & one;
-            result = result | (bx.context.new_cast(None, masked, result_type) << bx.context.new_rvalue_from_int(result_type, shift));
+            result = result
+                | (bx.context.new_cast(None, masked, result_type)
+                    << bx.context.new_rvalue_from_int(result_type, shift));
             shift += 1;
         }
 
@@ -394,46 +394,50 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                 return Err(());
             }};
         }
-        let (elem_ty_str, elem_ty) =
-            if let ty::Float(f) = in_elem.kind() {
-                let elem_ty = bx.cx.type_float_from_ty(*f);
-                match f.bit_width() {
-                    32 => ("f", elem_ty),
-                    64 => ("", elem_ty),
-                    _ => {
-                        return_error!(InvalidMonomorphization::FloatingPointVector { span, name, f_ty: *f, in_ty });
-                    }
+        let (elem_ty_str, elem_ty) = if let ty::Float(f) = in_elem.kind() {
+            let elem_ty = bx.cx.type_float_from_ty(*f);
+            match f.bit_width() {
+                32 => ("f", elem_ty),
+                64 => ("", elem_ty),
+                _ => {
+                    return_error!(InvalidMonomorphization::FloatingPointVector {
+                        span,
+                        name,
+                        f_ty: *f,
+                        in_ty
+                    });
                 }
             }
-            else {
-                return_error!(InvalidMonomorphization::FloatingPointType { span, name, in_ty });
-            };
+        } else {
+            return_error!(InvalidMonomorphization::FloatingPointType { span, name, in_ty });
+        };
 
         let vec_ty = bx.cx.type_vector(elem_ty, in_len);
 
-        let intr_name =
-            match name {
-                sym::simd_ceil => "ceil",
-                sym::simd_fabs => "fabs", // TODO(antoyo): pand with 170141183420855150465331762880109871103
-                sym::simd_fcos => "cos",
-                sym::simd_fexp2 => "exp2",
-                sym::simd_fexp => "exp",
-                sym::simd_flog10 => "log10",
-                sym::simd_flog2 => "log2",
-                sym::simd_flog => "log",
-                sym::simd_floor => "floor",
-                sym::simd_fma => "fma",
-                sym::simd_fpowi => "__builtin_powi",
-                sym::simd_fpow => "pow",
-                sym::simd_fsin => "sin",
-                sym::simd_fsqrt => "sqrt",
-                sym::simd_round => "round",
-                sym::simd_trunc => "trunc",
-                _ => return_error!(InvalidMonomorphization::UnrecognizedIntrinsic { span, name })
-            };
+        let intr_name = match name {
+            sym::simd_ceil => "ceil",
+            sym::simd_fabs => "fabs", // TODO(antoyo): pand with 170141183420855150465331762880109871103
+            sym::simd_fcos => "cos",
+            sym::simd_fexp2 => "exp2",
+            sym::simd_fexp => "exp",
+            sym::simd_flog10 => "log10",
+            sym::simd_flog2 => "log2",
+            sym::simd_flog => "log",
+            sym::simd_floor => "floor",
+            sym::simd_fma => "fma",
+            sym::simd_fpowi => "__builtin_powi",
+            sym::simd_fpow => "pow",
+            sym::simd_fsin => "sin",
+            sym::simd_fsqrt => "sqrt",
+            sym::simd_round => "round",
+            sym::simd_trunc => "trunc",
+            _ => return_error!(InvalidMonomorphization::UnrecognizedIntrinsic { span, name }),
+        };
         let builtin_name = format!("{}{}", intr_name, elem_ty_str);
         let funcs = bx.cx.functions.borrow();
-        let function = funcs.get(&builtin_name).unwrap_or_else(|| panic!("unable to find builtin function {}", builtin_name));
+        let function = funcs
+            .get(&builtin_name)
+            .unwrap_or_else(|| panic!("unable to find builtin function {}", builtin_name));
 
         // TODO(antoyo): add platform-specific behavior here for architectures that have these
         // intrinsics as instructions (for instance, gpus)
@@ -479,8 +483,12 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         return simd_simple_float_intrinsic(name, in_elem, in_ty, in_len, bx, span, args);
     }
 
-    #[cfg(feature="master")]
-    fn vector_ty<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, elem_ty: Ty<'tcx>, vec_len: u64) -> Type<'gcc> {
+    #[cfg(feature = "master")]
+    fn vector_ty<'gcc, 'tcx>(
+        cx: &CodegenCx<'gcc, 'tcx>,
+        elem_ty: Ty<'tcx>,
+        vec_len: u64,
+    ) -> Type<'gcc> {
         // FIXME: use cx.layout_of(ty).llvm_type() ?
         let elem_ty = match *elem_ty.kind() {
             ty::Int(v) => cx.type_int_from_ty(v),
@@ -491,15 +499,22 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         cx.type_vector(elem_ty, vec_len)
     }
 
-    #[cfg(feature="master")]
-    fn gather<'a, 'gcc, 'tcx>(default: RValue<'gcc>, pointers: RValue<'gcc>, mask: RValue<'gcc>, pointer_count: usize, bx: &mut Builder<'a, 'gcc, 'tcx>, in_len: u64, underlying_ty: Ty<'tcx>, invert: bool) -> RValue<'gcc> {
-        let vector_type =
-            if pointer_count > 1 {
-                bx.context.new_vector_type(bx.usize_type, in_len)
-            }
-            else {
-                vector_ty(bx, underlying_ty, in_len)
-            };
+    #[cfg(feature = "master")]
+    fn gather<'a, 'gcc, 'tcx>(
+        default: RValue<'gcc>,
+        pointers: RValue<'gcc>,
+        mask: RValue<'gcc>,
+        pointer_count: usize,
+        bx: &mut Builder<'a, 'gcc, 'tcx>,
+        in_len: u64,
+        underlying_ty: Ty<'tcx>,
+        invert: bool,
+    ) -> RValue<'gcc> {
+        let vector_type = if pointer_count > 1 {
+            bx.context.new_vector_type(bx.usize_type, in_len)
+        } else {
+            vector_ty(bx, underlying_ty, in_len)
+        };
         let elem_type = vector_type.dyncast_vector().expect("vector type").get_element_type();
 
         let mut values = vec![];
@@ -530,13 +545,12 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
 
         if invert {
             bx.shuffle_vector(vector, default, mask)
-        }
-        else {
+        } else {
             bx.shuffle_vector(default, vector, mask)
         }
     }
 
-    #[cfg(feature="master")]
+    #[cfg(feature = "master")]
     if name == sym::simd_gather {
         // simd_gather(values: <N x T>, pointers: <N x *_ T>,
         //             mask: <N x i{M}>) -> <N x T>
@@ -546,8 +560,14 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
 
         // All types must be simd vector types
         require_simd!(in_ty, InvalidMonomorphization::SimdFirst { span, name, ty: in_ty });
-        require_simd!(arg_tys[1], InvalidMonomorphization::SimdSecond { span, name, ty: arg_tys[1] });
-        require_simd!(arg_tys[2], InvalidMonomorphization::SimdThird { span, name, ty: arg_tys[2] });
+        require_simd!(
+            arg_tys[1],
+            InvalidMonomorphization::SimdSecond { span, name, ty: arg_tys[1] }
+        );
+        require_simd!(
+            arg_tys[2],
+            InvalidMonomorphization::SimdThird { span, name, ty: arg_tys[2] }
+        );
         require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty });
 
         // Of the same length:
@@ -641,10 +661,19 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             }
         }
 
-        return Ok(gather(args[0].immediate(), args[1].immediate(), args[2].immediate(), pointer_count, bx, in_len, underlying_ty, false));
+        return Ok(gather(
+            args[0].immediate(),
+            args[1].immediate(),
+            args[2].immediate(),
+            pointer_count,
+            bx,
+            in_len,
+            underlying_ty,
+            false,
+        ));
     }
 
-    #[cfg(feature="master")]
+    #[cfg(feature = "master")]
     if name == sym::simd_scatter {
         // simd_scatter(values: <N x T>, pointers: <N x *mut T>,
         //             mask: <N x i{M}>) -> ()
@@ -654,8 +683,14 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
 
         // All types must be simd vector types
         require_simd!(in_ty, InvalidMonomorphization::SimdFirst { span, name, ty: in_ty });
-        require_simd!(arg_tys[1], InvalidMonomorphization::SimdSecond { span, name, ty: arg_tys[1] });
-        require_simd!(arg_tys[2], InvalidMonomorphization::SimdThird { span, name, ty: arg_tys[2] });
+        require_simd!(
+            arg_tys[1],
+            InvalidMonomorphization::SimdSecond { span, name, ty: arg_tys[1] }
+        );
+        require_simd!(
+            arg_tys[2],
+            InvalidMonomorphization::SimdThird { span, name, ty: arg_tys[2] }
+        );
 
         // Of the same length:
         let (element_len1, _) = arg_tys[1].simd_size_and_type(bx.tcx());
@@ -744,17 +779,24 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             }
         }
 
-        let result = gather(args[0].immediate(), args[1].immediate(), args[2].immediate(), pointer_count, bx, in_len, underlying_ty, true);
+        let result = gather(
+            args[0].immediate(),
+            args[1].immediate(),
+            args[2].immediate(),
+            pointer_count,
+            bx,
+            in_len,
+            underlying_ty,
+            true,
+        );
 
         let pointers = args[1].immediate();
 
-        let vector_type =
-            if pointer_count > 1 {
-                bx.context.new_vector_type(bx.usize_type, in_len)
-            }
-            else {
-                vector_ty(bx, underlying_ty, in_len)
-            };
+        let vector_type = if pointer_count > 1 {
+            bx.context.new_vector_type(bx.usize_type, in_len)
+        } else {
+            vector_ty(bx, underlying_ty, in_len)
+        };
         let elem_type = vector_type.dyncast_vector().expect("vector type").get_element_type();
 
         for i in 0..in_len {
@@ -809,11 +851,12 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         let rhs = args[1].immediate();
         let is_add = name == sym::simd_saturating_add;
         let ptr_bits = bx.tcx().data_layout.pointer_size.bits() as _;
-        let (signed, elem_width, elem_ty) =
-            match *in_elem.kind() {
-                ty::Int(i) => (true, i.bit_width().unwrap_or(ptr_bits) / 8, bx.cx.type_int_from_ty(i)),
-                ty::Uint(i) => (false, i.bit_width().unwrap_or(ptr_bits) / 8, bx.cx.type_uint_from_ty(i)),
-                _ => {
+        let (signed, elem_width, elem_ty) = match *in_elem.kind() {
+            ty::Int(i) => (true, i.bit_width().unwrap_or(ptr_bits) / 8, bx.cx.type_int_from_ty(i)),
+            ty::Uint(i) => {
+                (false, i.bit_width().unwrap_or(ptr_bits) / 8, bx.cx.type_uint_from_ty(i))
+            }
+            _ => {
                 return_error!(InvalidMonomorphization::ExpectedVectorElementType {
                     span,
                     name,
@@ -823,77 +866,82 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             }
         };
 
-        let result =
-            match (signed, is_add) {
-                (false, true) => {
-                    let res = lhs + rhs;
-                    let cmp = bx.context.new_comparison(None, ComparisonOp::LessThan, res, lhs);
-                    res | cmp
-                },
-                (true, true) => {
-                    // Algorithm from: https://codereview.stackexchange.com/questions/115869/saturated-signed-addition
-                    // TODO(antoyo): improve using conditional operators if possible.
-                    // TODO(antoyo): dyncast_vector should not require a call to unqualified.
-                    let arg_type = lhs.get_type().unqualified();
-                    // TODO(antoyo): convert lhs and rhs to unsigned.
-                    let sum = lhs + rhs;
-                    let vector_type = arg_type.dyncast_vector().expect("vector type");
-                    let unit = vector_type.get_num_units();
-                    let a = bx.context.new_rvalue_from_int(elem_ty, ((elem_width as i32) << 3) - 1);
-                    let width = bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![a; unit]);
-
-                    let xor1 = lhs ^ rhs;
-                    let xor2 = lhs ^ sum;
-                    let and = bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, xor1) & xor2;
-                    let mask = and >> width;
-
-                    let one = bx.context.new_rvalue_one(elem_ty);
-                    let ones = bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![one; unit]);
-                    let shift1 = ones << width;
-                    let shift2 = sum >> width;
-                    let mask_min = shift1 ^ shift2;
-
-                    let and1 = bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, mask) & sum;
-                    let and2 = mask & mask_min;
-
-                    and1 + and2
-                },
-                (false, false) => {
-                    let res = lhs - rhs;
-                    let cmp = bx.context.new_comparison(None, ComparisonOp::LessThanEquals, res, lhs);
-                    res & cmp
-                },
-                (true, false) => {
-                    // TODO(antoyo): dyncast_vector should not require a call to unqualified.
-                    let arg_type = lhs.get_type().unqualified();
-                    // TODO(antoyo): this uses the same algorithm from saturating add, but add the
-                    // negative of the right operand. Find a proper subtraction algorithm.
-                    let rhs = bx.context.new_unary_op(None, UnaryOp::Minus, arg_type, rhs);
-
-                    // TODO(antoyo): convert lhs and rhs to unsigned.
-                    let sum = lhs + rhs;
-                    let vector_type = arg_type.dyncast_vector().expect("vector type");
-                    let unit = vector_type.get_num_units();
-                    let a = bx.context.new_rvalue_from_int(elem_ty, ((elem_width as i32) << 3) - 1);
-                    let width = bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![a; unit]);
-
-                    let xor1 = lhs ^ rhs;
-                    let xor2 = lhs ^ sum;
-                    let and = bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, xor1) & xor2;
-                    let mask = and >> width;
-
-                    let one = bx.context.new_rvalue_one(elem_ty);
-                    let ones = bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![one; unit]);
-                    let shift1 = ones << width;
-                    let shift2 = sum >> width;
-                    let mask_min = shift1 ^ shift2;
-
-                    let and1 = bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, mask) & sum;
-                    let and2 = mask & mask_min;
-
-                    and1 + and2
-                }
-            };
+        let result = match (signed, is_add) {
+            (false, true) => {
+                let res = lhs + rhs;
+                let cmp = bx.context.new_comparison(None, ComparisonOp::LessThan, res, lhs);
+                res | cmp
+            }
+            (true, true) => {
+                // Algorithm from: https://codereview.stackexchange.com/questions/115869/saturated-signed-addition
+                // TODO(antoyo): improve using conditional operators if possible.
+                // TODO(antoyo): dyncast_vector should not require a call to unqualified.
+                let arg_type = lhs.get_type().unqualified();
+                // TODO(antoyo): convert lhs and rhs to unsigned.
+                let sum = lhs + rhs;
+                let vector_type = arg_type.dyncast_vector().expect("vector type");
+                let unit = vector_type.get_num_units();
+                let a = bx.context.new_rvalue_from_int(elem_ty, ((elem_width as i32) << 3) - 1);
+                let width = bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![a; unit]);
+
+                let xor1 = lhs ^ rhs;
+                let xor2 = lhs ^ sum;
+                let and =
+                    bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, xor1) & xor2;
+                let mask = and >> width;
+
+                let one = bx.context.new_rvalue_one(elem_ty);
+                let ones =
+                    bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![one; unit]);
+                let shift1 = ones << width;
+                let shift2 = sum >> width;
+                let mask_min = shift1 ^ shift2;
+
+                let and1 =
+                    bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, mask) & sum;
+                let and2 = mask & mask_min;
+
+                and1 + and2
+            }
+            (false, false) => {
+                let res = lhs - rhs;
+                let cmp = bx.context.new_comparison(None, ComparisonOp::LessThanEquals, res, lhs);
+                res & cmp
+            }
+            (true, false) => {
+                // TODO(antoyo): dyncast_vector should not require a call to unqualified.
+                let arg_type = lhs.get_type().unqualified();
+                // TODO(antoyo): this uses the same algorithm from saturating add, but add the
+                // negative of the right operand. Find a proper subtraction algorithm.
+                let rhs = bx.context.new_unary_op(None, UnaryOp::Minus, arg_type, rhs);
+
+                // TODO(antoyo): convert lhs and rhs to unsigned.
+                let sum = lhs + rhs;
+                let vector_type = arg_type.dyncast_vector().expect("vector type");
+                let unit = vector_type.get_num_units();
+                let a = bx.context.new_rvalue_from_int(elem_ty, ((elem_width as i32) << 3) - 1);
+                let width = bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![a; unit]);
+
+                let xor1 = lhs ^ rhs;
+                let xor2 = lhs ^ sum;
+                let and =
+                    bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, xor1) & xor2;
+                let mask = and >> width;
+
+                let one = bx.context.new_rvalue_one(elem_ty);
+                let ones =
+                    bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![one; unit]);
+                let shift1 = ones << width;
+                let shift2 = sum >> width;
+                let mask_min = shift1 ^ shift2;
+
+                let and1 =
+                    bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, mask) & sum;
+                let and2 = mask & mask_min;
+
+                and1 + and2
+            }
+        };
 
         return Ok(result);
     }
@@ -968,7 +1016,6 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         1.0
     );
 
-
     macro_rules! minmax_red {
         ($name:ident: $int_red:ident, $float_red:ident) => {
             if name == sym::$name {
@@ -979,13 +1026,13 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                 return match in_elem.kind() {
                     ty::Int(_) | ty::Uint(_) => Ok(bx.$int_red(args[0].immediate())),
                     ty::Float(_) => Ok(bx.$float_red(args[0].immediate())),
-                    _ => return_error!(InvalidMonomorphization::UnsupportedSymbol { 
-                        span, 
+                    _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
+                        span,
                         name,
                         symbol: sym::$name,
                         in_ty,
-                        in_elem, 
-                        ret_ty 
+                        in_elem,
+                        ret_ty
                     }),
                 };
             }
@@ -1025,7 +1072,15 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                 return match in_elem.kind() {
                     ty::Int(_) | ty::Uint(_) => {
                         let r = bx.vector_reduce_op(input, $op);
-                        Ok(if !$boolean { r } else { bx.icmp(IntPredicate::IntNE, r, bx.context.new_rvalue_zero(r.get_type())) })
+                        Ok(if !$boolean {
+                            r
+                        } else {
+                            bx.icmp(
+                                IntPredicate::IntNE,
+                                r,
+                                bx.context.new_rvalue_zero(r.get_type()),
+                            )
+                        })
                     }
                     _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
                         span,