about summary refs log tree commit diff
diff options
context:
space:
mode:
authorantoyo <antoyo@users.noreply.github.com>2022-10-08 16:05:48 -0400
committerGitHub <noreply@github.com>2022-10-08 16:05:48 -0400
commit44e3ea3e42918ff6a5d2083ddc0c4ece16309862 (patch)
treebdc36d3ddd02fc654b0af8388d38222005f937d9
parent235414efaeea5f2cb14eb417f2b5caddb6211168 (diff)
parent74dac5d970fa3f5c070d7d656622068cadfb0feb (diff)
downloadrust-44e3ea3e42918ff6a5d2083ddc0c4ece16309862.tar.gz
rust-44e3ea3e42918ff6a5d2083ddc0c4ece16309862.zip
Merge pull request #229 from rust-lang/fix/fmaddsub
Fix/fmaddsub
-rw-r--r--src/base.rs3
-rw-r--r--src/builder.rs6
-rw-r--r--src/context.rs3
-rw-r--r--src/intrinsic/llvm.rs26
4 files changed, 31 insertions, 7 deletions
diff --git a/src/base.rs b/src/base.rs
index b60382496c2..6e1ad9f5315 100644
--- a/src/base.rs
+++ b/src/base.rs
@@ -126,6 +126,9 @@ pub fn compile_codegen_unit<'tcx>(tcx: TyCtxt<'tcx>, cgu_name: Symbol, supports_
             context.add_command_line_option("-fdata-sections");
         }
 
+        if env::var("CG_GCCJIT_DUMP_RTL").as_deref() == Ok("1") {
+            context.add_command_line_option("-fdump-rtl-vregs");
+        }
         if env::var("CG_GCCJIT_DUMP_TREE_ALL").as_deref() == Ok("1") {
             context.add_command_line_option("-fdump-tree-all");
         }
diff --git a/src/builder.rs b/src/builder.rs
index f0582fdcef2..f3933a2d706 100644
--- a/src/builder.rs
+++ b/src/builder.rs
@@ -280,7 +280,11 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         let func_name = format!("{:?}", func_ptr);
         let previous_arg_count = args.len();
         let orig_args = args;
-        let args = llvm::adjust_intrinsic_arguments(&self, gcc_func, args.into(), &func_name);
+        let args = {
+            let function_address_names = self.function_address_names.borrow();
+            let original_function_name = function_address_names.get(&func_ptr);
+            llvm::adjust_intrinsic_arguments(&self, gcc_func, args.into(), &func_name, original_function_name)
+        };
         let args_adjusted = args.len() != previous_arg_count;
         let args = self.check_ptr_call("call", func_ptr, &*args);
 
diff --git a/src/context.rs b/src/context.rs
index 2699559dc2a..5f34ddd92ba 100644
--- a/src/context.rs
+++ b/src/context.rs
@@ -33,6 +33,7 @@ pub struct CodegenCx<'gcc, 'tcx> {
     // TODO(bjorn3): Can this field be removed?
     pub current_func: RefCell<Option<Function<'gcc>>>,
     pub normal_function_addresses: RefCell<FxHashSet<RValue<'gcc>>>,
+    pub function_address_names: RefCell<FxHashMap<RValue<'gcc>, String>>,
 
     pub functions: RefCell<FxHashMap<String, Function<'gcc>>>,
     pub intrinsics: RefCell<FxHashMap<String, Function<'gcc>>>,
@@ -192,6 +193,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
             context,
             current_func: RefCell::new(None),
             normal_function_addresses: Default::default(),
+            function_address_names: Default::default(),
             functions: RefCell::new(functions),
             intrinsics: RefCell::new(FxHashMap::default()),
 
@@ -345,6 +347,7 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
         // FIXME(antoyo): the rustc API seems to call get_fn_addr() when not needed (e.g. for FFI).
 
         self.normal_function_addresses.borrow_mut().insert(ptr);
+        self.function_address_names.borrow_mut().insert(ptr, func_name.to_string());
 
         ptr
     }
diff --git a/src/intrinsic/llvm.rs b/src/intrinsic/llvm.rs
index 5d10119e85e..621ef328a8c 100644
--- a/src/intrinsic/llvm.rs
+++ b/src/intrinsic/llvm.rs
@@ -5,7 +5,7 @@ use rustc_codegen_ssa::traits::BuilderMethods;
 
 use crate::{context::CodegenCx, builder::Builder};
 
-pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc, 'tcx>, gcc_func: FunctionPtrType<'gcc>, mut args: Cow<'b, [RValue<'gcc>]>, func_name: &str) -> Cow<'b, [RValue<'gcc>]> {
+pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc, 'tcx>, gcc_func: FunctionPtrType<'gcc>, mut args: Cow<'b, [RValue<'gcc>]>, func_name: &str, original_function_name: Option<&String>) -> Cow<'b, [RValue<'gcc>]> {
     // Some LLVM intrinsics do not map 1-to-1 to GCC intrinsics, so we add the missing
     // arguments here.
     if gcc_func.get_param_count() != args.len() {
@@ -277,11 +277,23 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
                 let c = builder.context.new_rvalue_from_vector(None, arg3_type, &[new_args[2]; 2]);
                 args = vec![a, b, c, new_args[3]].into();
             },
-            "__builtin_ia32_vfmaddsubpd256" | "__builtin_ia32_vfmaddsubps" | "__builtin_ia32_vfmaddsubps256" => {
-                let mut new_args = args.to_vec();
-                let arg3 = &mut new_args[2];
-                *arg3 = builder.context.new_unary_op(None, UnaryOp::Minus, arg3.get_type(), *arg3);
-                args = new_args.into();
+            "__builtin_ia32_vfmaddsubpd256" | "__builtin_ia32_vfmaddsubps" | "__builtin_ia32_vfmaddsubps256"
+                | "__builtin_ia32_vfmaddsubpd" => {
+                if let Some(original_function_name) = original_function_name {
+                    match &**original_function_name {
+                        "llvm.x86.fma.vfmsubadd.pd.256" | "llvm.x86.fma.vfmsubadd.ps" | "llvm.x86.fma.vfmsubadd.ps.256"
+                            | "llvm.x86.fma.vfmsubadd.pd" => {
+                            // NOTE: since both llvm.x86.fma.vfmsubadd.ps and llvm.x86.fma.vfmaddsub.ps maps to
+                            // __builtin_ia32_vfmaddsubps, only add minus if this comes from a
+                            // subadd LLVM intrinsic, e.g. _mm256_fmsubadd_pd.
+                            let mut new_args = args.to_vec();
+                            let arg3 = &mut new_args[2];
+                            *arg3 = builder.context.new_unary_op(None, UnaryOp::Minus, arg3.get_type(), *arg3);
+                            args = new_args.into();
+                        },
+                        _ => (),
+                    }
+                }
             },
             "__builtin_ia32_ldmxcsr" => {
                 // The builtin __builtin_ia32_ldmxcsr takes an integer value while llvm.x86.sse.ldmxcsr takes a pointer,
@@ -583,6 +595,8 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
         "llvm.fshr.v32i16" => "__builtin_ia32_vpshrdv_v32hi",
         "llvm.fshr.v16i16" => "__builtin_ia32_vpshrdv_v16hi",
         "llvm.fshr.v8i16" => "__builtin_ia32_vpshrdv_v8hi",
+        "llvm.x86.fma.vfmadd.sd" => "__builtin_ia32_vfmaddsd3",
+        "llvm.x86.fma.vfmadd.ss" => "__builtin_ia32_vfmaddss3",
 
         // The above doc points to unknown builtins for the following, so override them:
         "llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si",