4 files changed, 85 insertions, 102 deletions
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index 896e56a9a1e..053cda1e7cc 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -2,6 +2,7 @@ use crate::common::Funclet;
 use crate::context::CodegenCx;
 use crate::llvm::{self, BasicBlock, False};
 use crate::llvm::{AtomicOrdering, AtomicRmwBinOp, SynchronizationScope};
+use crate::llvm_util;
 use crate::type_::Type;
 use crate::type_of::LayoutLlvmExt;
 use crate::value::Value;
@@ -16,7 +17,7 @@ use rustc_data_structures::small_c_str::SmallCStr;
 use rustc_hir::def_id::DefId;
 use rustc_middle::ty::layout::TyAndLayout;
 use rustc_middle::ty::{self, Ty, TyCtxt};
-use rustc_span::{sym, Span};
+use rustc_span::Span;
 use rustc_target::abi::{self, Align, Size};
 use rustc_target::spec::{HasTargetSpec, Target};
 use std::borrow::Cow;
@@ -261,7 +262,7 @@ impl BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     fn fadd_fast(&mut self, lhs: &'ll Value, rhs: &'ll Value) -> &'ll Value {
         unsafe {
             let instr = llvm::LLVMBuildFAdd(self.llbuilder, lhs, rhs, UNNAMED);
-            llvm::LLVMRustSetHasUnsafeAlgebra(instr);
+            llvm::LLVMRustSetFastMath(instr);
             instr
         }
     }
@@ -269,7 +270,7 @@ impl BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     fn fsub_fast(&mut self, lhs: &'ll Value, rhs: &'ll Value) -> &'ll Value {
         unsafe {
             let instr = llvm::LLVMBuildFSub(self.llbuilder, lhs, rhs, UNNAMED);
-            llvm::LLVMRustSetHasUnsafeAlgebra(instr);
+            llvm::LLVMRustSetFastMath(instr);
             instr
         }
     }
@@ -277,7 +278,7 @@ impl BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     fn fmul_fast(&mut self, lhs: &'ll Value, rhs: &'ll Value) -> &'ll Value {
         unsafe {
             let instr = llvm::LLVMBuildFMul(self.llbuilder, lhs, rhs, UNNAMED);
-            llvm::LLVMRustSetHasUnsafeAlgebra(instr);
+            llvm::LLVMRustSetFastMath(instr);
             instr
         }
     }
@@ -285,7 +286,7 @@ impl BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     fn fdiv_fast(&mut self, lhs: &'ll Value, rhs: &'ll Value) -> &'ll Value {
         unsafe {
             let instr = llvm::LLVMBuildFDiv(self.llbuilder, lhs, rhs, UNNAMED);
-            llvm::LLVMRustSetHasUnsafeAlgebra(instr);
+            llvm::LLVMRustSetFastMath(instr);
             instr
         }
     }
@@ -293,7 +294,7 @@ impl BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     fn frem_fast(&mut self, lhs: &'ll Value, rhs: &'ll Value) -> &'ll Value {
         unsafe {
             let instr = llvm::LLVMBuildFRem(self.llbuilder, lhs, rhs, UNNAMED);
-            llvm::LLVMRustSetHasUnsafeAlgebra(instr);
+            llvm::LLVMRustSetFastMath(instr);
             instr
         }
     }
@@ -669,81 +670,47 @@ impl BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     }
 
     fn fptoui_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> Option<&'ll Value> {
-        // WebAssembly has saturating floating point to integer casts if the
-        // `nontrapping-fptoint` target feature is activated. We'll use those if
-        // they are available.
-        if self.sess().target.arch == "wasm32"
-            && self.sess().target_features.contains(&sym::nontrapping_dash_fptoint)
-        {
+        if llvm_util::get_version() >= (12, 0, 0) && !self.fptoint_sat_broken_in_llvm() {
             let src_ty = self.cx.val_ty(val);
             let float_width = self.cx.float_width(src_ty);
             let int_width = self.cx.int_width(dest_ty);
-            let name = match (int_width, float_width) {
-                (32, 32) => Some("llvm.wasm.trunc.saturate.unsigned.i32.f32"),
-                (32, 64) => Some("llvm.wasm.trunc.saturate.unsigned.i32.f64"),
-                (64, 32) => Some("llvm.wasm.trunc.saturate.unsigned.i64.f32"),
-                (64, 64) => Some("llvm.wasm.trunc.saturate.unsigned.i64.f64"),
-                _ => None,
-            };
-            if let Some(name) = name {
-                let intrinsic = self.get_intrinsic(name);
-                return Some(self.call(intrinsic, &[val], None));
-            }
+            let name = format!("llvm.fptoui.sat.i{}.f{}", int_width, float_width);
+            let intrinsic = self.get_intrinsic(&name);
+            return Some(self.call(intrinsic, &[val], None));
         }
+
         None
     }
 
     fn fptosi_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> Option<&'ll Value> {
-        // WebAssembly has saturating floating point to integer casts if the
-        // `nontrapping-fptoint` target feature is activated. We'll use those if
-        // they are available.
-        if self.sess().target.arch == "wasm32"
-            && self.sess().target_features.contains(&sym::nontrapping_dash_fptoint)
-        {
+        if llvm_util::get_version() >= (12, 0, 0) && !self.fptoint_sat_broken_in_llvm() {
             let src_ty = self.cx.val_ty(val);
             let float_width = self.cx.float_width(src_ty);
             let int_width = self.cx.int_width(dest_ty);
-            let name = match (int_width, float_width) {
-                (32, 32) => Some("llvm.wasm.trunc.saturate.signed.i32.f32"),
-                (32, 64) => Some("llvm.wasm.trunc.saturate.signed.i32.f64"),
-                (64, 32) => Some("llvm.wasm.trunc.saturate.signed.i64.f32"),
-                (64, 64) => Some("llvm.wasm.trunc.saturate.signed.i64.f64"),
-                _ => None,
-            };
-            if let Some(name) = name {
-                let intrinsic = self.get_intrinsic(name);
-                return Some(self.call(intrinsic, &[val], None));
-            }
+            let name = format!("llvm.fptosi.sat.i{}.f{}", int_width, float_width);
+            let intrinsic = self.get_intrinsic(&name);
+            return Some(self.call(intrinsic, &[val], None));
         }
-        None
-    }
 
-    fn fptosui_may_trap(&self, val: &'ll Value, dest_ty: &'ll Type) -> bool {
-        // Most of the time we'll be generating the `fptosi` or `fptoui`
-        // instruction for floating-point-to-integer conversions. These
-        // instructions by definition in LLVM do not trap. For the WebAssembly
-        // target, however, we'll lower in some cases to intrinsic calls instead
-        // which may trap. If we detect that this is a situation where we'll be
-        // using the intrinsics then we report that the call map trap, which
-        // callers might need to handle.
-        if !self.wasm_and_missing_nontrapping_fptoint() {
-            return false;
-        }
-        let src_ty = self.cx.val_ty(val);
-        let float_width = self.cx.float_width(src_ty);
-        let int_width = self.cx.int_width(dest_ty);
-        matches!((int_width, float_width), (32, 32) | (32, 64) | (64, 32) | (64, 64))
+        None
     }
 
     fn fptoui(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
-        // When we can, use the native wasm intrinsics which have tighter
-        // codegen. Note that this has a semantic difference in that the
-        // intrinsic can trap whereas `fptoui` never traps. That difference,
-        // however, is handled by `fptosui_may_trap` above.
+        // On WebAssembly the `fptoui` and `fptosi` instructions currently have
+        // poor codegen. The reason for this is that the corresponding wasm
+        // instructions, `i32.trunc_f32_s` for example, will trap when the float
+        // is out-of-bounds, infinity, or nan. This means that LLVM
+        // automatically inserts control flow around `fptoui` and `fptosi`
+        // because the LLVM instruction `fptoui` is defined as producing a
+        // poison value, not having UB on out-of-bounds values.
         //
-        // Note that we skip the wasm intrinsics for vector types where `fptoui`
-        // must be used instead.
-        if self.wasm_and_missing_nontrapping_fptoint() {
+        // This method, however, is only used with non-saturating casts that
+        // have UB on out-of-bounds values. This means that it's ok if we use
+        // the raw wasm instruction since out-of-bounds values can do whatever
+        // we like. To ensure that LLVM picks the right instruction we choose
+        // the raw wasm intrinsic functions which avoid LLVM inserting all the
+        // other control flow automatically.
+        if self.sess().target.arch == "wasm32" {
             let src_ty = self.cx.val_ty(val);
             if self.cx.type_kind(src_ty) != TypeKind::Vector {
                 let float_width = self.cx.float_width(src_ty);
@@ -765,7 +732,8 @@ impl BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     }
 
     fn fptosi(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
-        if self.wasm_and_missing_nontrapping_fptoint() {
+        // see `fptoui` above for why wasm is different here
+        if self.sess().target.arch == "wasm32" {
             let src_ty = self.cx.val_ty(val);
             if self.cx.type_kind(src_ty) != TypeKind::Vector {
                 let float_width = self.cx.float_width(src_ty);
@@ -1242,14 +1210,14 @@ impl Builder<'a, 'll, 'tcx> {
     pub fn vector_reduce_fadd_fast(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
         unsafe {
             let instr = llvm::LLVMRustBuildVectorReduceFAdd(self.llbuilder, acc, src);
-            llvm::LLVMRustSetHasUnsafeAlgebra(instr);
+            llvm::LLVMRustSetFastMath(instr);
             instr
         }
     }
     pub fn vector_reduce_fmul_fast(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
         unsafe {
             let instr = llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src);
-            llvm::LLVMRustSetHasUnsafeAlgebra(instr);
+            llvm::LLVMRustSetFastMath(instr);
             instr
         }
     }
@@ -1282,7 +1250,7 @@ impl Builder<'a, 'll, 'tcx> {
         unsafe {
             let instr =
                 llvm::LLVMRustBuildVectorReduceFMin(self.llbuilder, src, /*NoNaNs:*/ true);
-            llvm::LLVMRustSetHasUnsafeAlgebra(instr);
+            llvm::LLVMRustSetFastMath(instr);
             instr
         }
     }
@@ -1290,7 +1258,7 @@ impl Builder<'a, 'll, 'tcx> {
         unsafe {
             let instr =
                 llvm::LLVMRustBuildVectorReduceFMax(self.llbuilder, src, /*NoNaNs:*/ true);
-            llvm::LLVMRustSetHasUnsafeAlgebra(instr);
+            llvm::LLVMRustSetFastMath(instr);
             instr
         }
     }
@@ -1420,8 +1388,11 @@ impl Builder<'a, 'll, 'tcx> {
         }
     }
 
-    fn wasm_and_missing_nontrapping_fptoint(&self) -> bool {
-        self.sess().target.arch == "wasm32"
-            && !self.sess().target_features.contains(&sym::nontrapping_dash_fptoint)
+    fn fptoint_sat_broken_in_llvm(&self) -> bool {
+        match self.tcx.sess.target.arch.as_str() {
+            // FIXME - https://bugs.llvm.org/show_bug.cgi?id=50083
+            "riscv64" => llvm_util::get_version() < (13, 0, 0),
+            _ => false,
+        }
     }
 }
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index 773c0c16328..f5c54b11c08 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -503,14 +503,6 @@ impl CodegenCx<'b, 'tcx> {
         let t_f32 = self.type_f32();
         let t_f64 = self.type_f64();
 
-        ifn!("llvm.wasm.trunc.saturate.unsigned.i32.f32", fn(t_f32) -> t_i32);
-        ifn!("llvm.wasm.trunc.saturate.unsigned.i32.f64", fn(t_f64) -> t_i32);
-        ifn!("llvm.wasm.trunc.saturate.unsigned.i64.f32", fn(t_f32) -> t_i64);
-        ifn!("llvm.wasm.trunc.saturate.unsigned.i64.f64", fn(t_f64) -> t_i64);
-        ifn!("llvm.wasm.trunc.saturate.signed.i32.f32", fn(t_f32) -> t_i32);
-        ifn!("llvm.wasm.trunc.saturate.signed.i32.f64", fn(t_f64) -> t_i32);
-        ifn!("llvm.wasm.trunc.saturate.signed.i64.f32", fn(t_f32) -> t_i64);
-        ifn!("llvm.wasm.trunc.saturate.signed.i64.f64", fn(t_f64) -> t_i64);
         ifn!("llvm.wasm.trunc.unsigned.i32.f32", fn(t_f32) -> t_i32);
         ifn!("llvm.wasm.trunc.unsigned.i32.f64", fn(t_f64) -> t_i32);
         ifn!("llvm.wasm.trunc.unsigned.i64.f32", fn(t_f32) -> t_i64);
@@ -520,6 +512,28 @@ impl CodegenCx<'b, 'tcx> {
         ifn!("llvm.wasm.trunc.signed.i64.f32", fn(t_f32) -> t_i64);
         ifn!("llvm.wasm.trunc.signed.i64.f64", fn(t_f64) -> t_i64);
 
+        ifn!("llvm.fptosi.sat.i8.f32", fn(t_f32) -> t_i8);
+        ifn!("llvm.fptosi.sat.i16.f32", fn(t_f32) -> t_i16);
+        ifn!("llvm.fptosi.sat.i32.f32", fn(t_f32) -> t_i32);
+        ifn!("llvm.fptosi.sat.i64.f32", fn(t_f32) -> t_i64);
+        ifn!("llvm.fptosi.sat.i128.f32", fn(t_f32) -> t_i128);
+        ifn!("llvm.fptosi.sat.i8.f64", fn(t_f64) -> t_i8);
+        ifn!("llvm.fptosi.sat.i16.f64", fn(t_f64) -> t_i16);
+        ifn!("llvm.fptosi.sat.i32.f64", fn(t_f64) -> t_i32);
+        ifn!("llvm.fptosi.sat.i64.f64", fn(t_f64) -> t_i64);
+        ifn!("llvm.fptosi.sat.i128.f64", fn(t_f64) -> t_i128);
+
+        ifn!("llvm.fptoui.sat.i8.f32", fn(t_f32) -> t_i8);
+        ifn!("llvm.fptoui.sat.i16.f32", fn(t_f32) -> t_i16);
+        ifn!("llvm.fptoui.sat.i32.f32", fn(t_f32) -> t_i32);
+        ifn!("llvm.fptoui.sat.i64.f32", fn(t_f32) -> t_i64);
+        ifn!("llvm.fptoui.sat.i128.f32", fn(t_f32) -> t_i128);
+        ifn!("llvm.fptoui.sat.i8.f64", fn(t_f64) -> t_i8);
+        ifn!("llvm.fptoui.sat.i16.f64", fn(t_f64) -> t_i16);
+        ifn!("llvm.fptoui.sat.i32.f64", fn(t_f64) -> t_i32);
+        ifn!("llvm.fptoui.sat.i64.f64", fn(t_f64) -> t_i64);
+        ifn!("llvm.fptoui.sat.i128.f64", fn(t_f64) -> t_i128);
+
         ifn!("llvm.trap", fn() -> void);
         ifn!("llvm.debugtrap", fn() -> void);
         ifn!("llvm.frameaddress", fn(t_i32) -> i8p);
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index 52fab2ea17d..fc6c1abf4af 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -1053,50 +1053,48 @@ fn generic_simd_intrinsic(
         let vec_ty = bx.type_vector(elem_ty, in_len);
 
         let (intr_name, fn_ty) = match name {
-            sym::simd_fsqrt => ("sqrt", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_fsin => ("sin", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_fcos => ("cos", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_fabs => ("fabs", bx.type_func(&[vec_ty], vec_ty)),
             sym::simd_ceil => ("ceil", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_floor => ("floor", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_round => ("round", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_trunc => ("trunc", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_fexp => ("exp", bx.type_func(&[vec_ty], vec_ty)),
+            sym::simd_fabs => ("fabs", bx.type_func(&[vec_ty], vec_ty)),
+            sym::simd_fcos => ("cos", bx.type_func(&[vec_ty], vec_ty)),
             sym::simd_fexp2 => ("exp2", bx.type_func(&[vec_ty], vec_ty)),
+            sym::simd_fexp => ("exp", bx.type_func(&[vec_ty], vec_ty)),
             sym::simd_flog10 => ("log10", bx.type_func(&[vec_ty], vec_ty)),
             sym::simd_flog2 => ("log2", bx.type_func(&[vec_ty], vec_ty)),
             sym::simd_flog => ("log", bx.type_func(&[vec_ty], vec_ty)),
+            sym::simd_floor => ("floor", bx.type_func(&[vec_ty], vec_ty)),
+            sym::simd_fma => ("fma", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)),
             sym::simd_fpowi => ("powi", bx.type_func(&[vec_ty, bx.type_i32()], vec_ty)),
             sym::simd_fpow => ("pow", bx.type_func(&[vec_ty, vec_ty], vec_ty)),
-            sym::simd_fma => ("fma", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)),
+            sym::simd_fsin => ("sin", bx.type_func(&[vec_ty], vec_ty)),
+            sym::simd_fsqrt => ("sqrt", bx.type_func(&[vec_ty], vec_ty)),
+            sym::simd_round => ("round", bx.type_func(&[vec_ty], vec_ty)),
+            sym::simd_trunc => ("trunc", bx.type_func(&[vec_ty], vec_ty)),
             _ => return_error!("unrecognized intrinsic `{}`", name),
         };
-
         let llvm_name = &format!("llvm.{0}.v{1}{2}", intr_name, in_len, elem_ty_str);
         let f = bx.declare_cfn(&llvm_name, llvm::UnnamedAddr::No, fn_ty);
         let c = bx.call(f, &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(), None);
-        unsafe { llvm::LLVMRustSetHasUnsafeAlgebra(c) };
         Ok(c)
     }
 
     if std::matches!(
         name,
-        sym::simd_fsqrt
-            | sym::simd_fsin
-            | sym::simd_fcos
+        sym::simd_ceil
             | sym::simd_fabs
-            | sym::simd_ceil
-            | sym::simd_floor
-            | sym::simd_round
-            | sym::simd_trunc
-            | sym::simd_fexp
+            | sym::simd_fcos
             | sym::simd_fexp2
+            | sym::simd_fexp
             | sym::simd_flog10
             | sym::simd_flog2
             | sym::simd_flog
-            | sym::simd_fpowi
-            | sym::simd_fpow
+            | sym::simd_floor
             | sym::simd_fma
+            | sym::simd_fpow
+            | sym::simd_fpowi
+            | sym::simd_fsin
+            | sym::simd_fsqrt
+            | sym::simd_round
+            | sym::simd_trunc
     ) {
         return simd_simple_float_intrinsic(name, in_elem, in_ty, in_len, bx, span, args);
     }
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index 54a8249b175..32b1526f6e4 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -1354,7 +1354,7 @@ extern "C" {
     pub fn LLVMBuildNeg(B: &Builder<'a>, V: &'a Value, Name: *const c_char) -> &'a Value;
     pub fn LLVMBuildFNeg(B: &Builder<'a>, V: &'a Value, Name: *const c_char) -> &'a Value;
     pub fn LLVMBuildNot(B: &Builder<'a>, V: &'a Value, Name: *const c_char) -> &'a Value;
-    pub fn LLVMRustSetHasUnsafeAlgebra(Instr: &Value);
+    pub fn LLVMRustSetFastMath(Instr: &Value);
 
     // Memory
     pub fn LLVMBuildAlloca(B: &Builder<'a>, Ty: &'a Type, Name: *const c_char) -> &'a Value;