about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2016-03-07 04:25:27 +0000
committerbors <bors@rust-lang.org>2016-03-07 04:25:27 +0000
commit6d262db4482e13ec05eb113e57e26d249698b4cf (patch)
tree6a93f96508825ad54693a01250ec607c6f3e20e6
parent6eb81a1f3bc27f2160048c09e3936486b4c9ef59 (diff)
parenta409076df4ac1e80d0e8b4ed55608cbd354129ef (diff)
downloadrust-6d262db4482e13ec05eb113e57e26d249698b4cf.tar.gz
rust-6d262db4482e13ec05eb113e57e26d249698b4cf.zip
Auto merge of #32066 - ruud-v-a:fma, r=alexcrichton
This adds support for fused multiply-add and multiply-subtract vector intrinsics for 128 and 256-bit vectors of `f32` and `f64`. These correspond to the intrinsics [listed here](https://software.intel.com/en-us/node/523929) except for the `_ss` and `_sd` variants. The intrinsics added are:

 * `fmadd`
 * `fmaddsub`
 * `fmsub`
 * `fmsubadd`
 * `fnmadd`
 * `fnmsub`

The “fma” target feature must be enabled by passing `-C target-feature=+fma` to rustc when using these, otherwise LLVM will complain.

I verified locally that the `x86_mm256_fmadd_ps` and `x86_mm256_fmsub_ps` work.
-rw-r--r--src/etc/platform-intrinsics/generator.py6
-rw-r--r--src/etc/platform-intrinsics/x86/fma.json47
-rw-r--r--src/librustc_platform_intrinsics/x86.rs120
3 files changed, 170 insertions, 3 deletions
diff --git a/src/etc/platform-intrinsics/generator.py b/src/etc/platform-intrinsics/generator.py
index e3aa4e688d3..0e0d4841063 100644
--- a/src/etc/platform-intrinsics/generator.py
+++ b/src/etc/platform-intrinsics/generator.py
@@ -691,7 +691,7 @@ def parse_args():
     parser.add_argument('-o', '--out', type=argparse.FileType('w'), default=sys.stdout,
                         help = 'File to output to (default stdout).')
     parser.add_argument('-i', '--info', type=argparse.FileType('r'),
-                        help = 'File containing platform specific information to merge into'
+                        help = 'File containing platform specific information to merge into '
                                 'the input files\' header.')
     parser.add_argument('in_', metavar="FILE", type=argparse.FileType('r'), nargs='+',
                         help = 'JSON files to load')
@@ -735,12 +735,12 @@ class CompilerDefs(object):
 
 use {{Intrinsic, i, i_, u, u_, f, v, v_, agg, p, void}};
 use IntrinsicDef::Named;
-use rustc::middle::ty;
+use rustc::middle::ty::TyCtxt;
 
 // The default inlining settings trigger a pathological behaviour in
 // LLVM, which causes makes compilation very slow. See #28273.
 #[inline(never)]
-pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {{
+pub fn find<'tcx>(_tcx: &TyCtxt<'tcx>, name: &str) -> Option<Intrinsic> {{
     if !name.starts_with("{0}") {{ return None }}
     Some(match &name["{0}".len()..] {{'''.format(platform.intrinsic_prefix())
 
diff --git a/src/etc/platform-intrinsics/x86/fma.json b/src/etc/platform-intrinsics/x86/fma.json
new file mode 100644
index 00000000000..c922d166c8f
--- /dev/null
+++ b/src/etc/platform-intrinsics/x86/fma.json
@@ -0,0 +1,47 @@
+{
+    "llvm_prefix": "llvm.x86.fma.",
+    "intrinsics": [
+        {
+            "intrinsic": "{0.width_mm}_fmadd_{0.data_type}",
+            "width": [128, 256],
+            "llvm": "vfmadd.{0.data_type_short}{0.width_suffix}",
+            "ret": "f(32-64)",
+            "args": ["0", "0", "0"]
+        },
+        {
+            "intrinsic": "{0.width_mm}_fmaddsub_{0.data_type}",
+            "width": [128, 256],
+            "llvm": "vfmaddsub.{0.data_type_short}{0.width_suffix}",
+            "ret": "f(32-64)",
+            "args": ["0", "0", "0"]
+        },
+        {
+            "intrinsic": "{0.width_mm}_fmsub_{0.data_type}",
+            "width": [128, 256],
+            "llvm": "vfmsub.{0.data_type_short}{0.width_suffix}",
+            "ret": "f(32-64)",
+            "args": ["0", "0", "0"]
+        },
+        {
+            "intrinsic": "{0.width_mm}_fmsubadd_{0.data_type}",
+            "width": [128, 256],
+            "llvm": "vfmsubadd.{0.data_type_short}{0.width_suffix}",
+            "ret": "f(32-64)",
+            "args": ["0", "0", "0"]
+        },
+        {
+            "intrinsic": "{0.width_mm}_fnmadd_{0.data_type}",
+            "width": [128, 256],
+            "llvm": "vfnmadd.{0.data_type_short}{0.width_suffix}",
+            "ret": "f(32-64)",
+            "args": ["0", "0", "0"]
+        },
+        {
+            "intrinsic": "{0.width_mm}_fnmsub_{0.data_type}",
+            "width": [128, 256],
+            "llvm": "vfnmsub.{0.data_type_short}{0.width_suffix}",
+            "ret": "f(32-64)",
+            "args": ["0", "0", "0"]
+        }
+    ]
+}
diff --git a/src/librustc_platform_intrinsics/x86.rs b/src/librustc_platform_intrinsics/x86.rs
index 4a9b9970caf..168ae79ab74 100644
--- a/src/librustc_platform_intrinsics/x86.rs
+++ b/src/librustc_platform_intrinsics/x86.rs
@@ -1108,6 +1108,126 @@ pub fn find<'tcx>(_tcx: &TyCtxt<'tcx>, name: &str) -> Option<Intrinsic> {
             output: v(u(16), 16),
             definition: Named("llvm.x86.avx2.psubus.w")
         },
+        "_fmadd_ps" => Intrinsic {
+            inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)],
+            output: v(f(32), 4),
+            definition: Named("llvm.x86.fma.vfmadd.ps")
+        },
+        "_fmadd_pd" => Intrinsic {
+            inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)],
+            output: v(f(64), 2),
+            definition: Named("llvm.x86.fma.vfmadd.pd")
+        },
+        "256_fmadd_ps" => Intrinsic {
+            inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)],
+            output: v(f(32), 8),
+            definition: Named("llvm.x86.fma.vfmadd.ps.256")
+        },
+        "256_fmadd_pd" => Intrinsic {
+            inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)],
+            output: v(f(64), 4),
+            definition: Named("llvm.x86.fma.vfmadd.pd.256")
+        },
+        "_fmaddsub_ps" => Intrinsic {
+            inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)],
+            output: v(f(32), 4),
+            definition: Named("llvm.x86.fma.vfmaddsub.ps")
+        },
+        "_fmaddsub_pd" => Intrinsic {
+            inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)],
+            output: v(f(64), 2),
+            definition: Named("llvm.x86.fma.vfmaddsub.pd")
+        },
+        "256_fmaddsub_ps" => Intrinsic {
+            inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)],
+            output: v(f(32), 8),
+            definition: Named("llvm.x86.fma.vfmaddsub.ps.256")
+        },
+        "256_fmaddsub_pd" => Intrinsic {
+            inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)],
+            output: v(f(64), 4),
+            definition: Named("llvm.x86.fma.vfmaddsub.pd.256")
+        },
+        "_fmsub_ps" => Intrinsic {
+            inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)],
+            output: v(f(32), 4),
+            definition: Named("llvm.x86.fma.vfmsub.ps")
+        },
+        "_fmsub_pd" => Intrinsic {
+            inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)],
+            output: v(f(64), 2),
+            definition: Named("llvm.x86.fma.vfmsub.pd")
+        },
+        "256_fmsub_ps" => Intrinsic {
+            inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)],
+            output: v(f(32), 8),
+            definition: Named("llvm.x86.fma.vfmsub.ps.256")
+        },
+        "256_fmsub_pd" => Intrinsic {
+            inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)],
+            output: v(f(64), 4),
+            definition: Named("llvm.x86.fma.vfmsub.pd.256")
+        },
+        "_fmsubadd_ps" => Intrinsic {
+            inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)],
+            output: v(f(32), 4),
+            definition: Named("llvm.x86.fma.vfmsubadd.ps")
+        },
+        "_fmsubadd_pd" => Intrinsic {
+            inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)],
+            output: v(f(64), 2),
+            definition: Named("llvm.x86.fma.vfmsubadd.pd")
+        },
+        "256_fmsubadd_ps" => Intrinsic {
+            inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)],
+            output: v(f(32), 8),
+            definition: Named("llvm.x86.fma.vfmsubadd.ps.256")
+        },
+        "256_fmsubadd_pd" => Intrinsic {
+            inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)],
+            output: v(f(64), 4),
+            definition: Named("llvm.x86.fma.vfmsubadd.pd.256")
+        },
+        "_fnmadd_ps" => Intrinsic {
+            inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)],
+            output: v(f(32), 4),
+            definition: Named("llvm.x86.fma.vfnmadd.ps")
+        },
+        "_fnmadd_pd" => Intrinsic {
+            inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)],
+            output: v(f(64), 2),
+            definition: Named("llvm.x86.fma.vfnmadd.pd")
+        },
+        "256_fnmadd_ps" => Intrinsic {
+            inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)],
+            output: v(f(32), 8),
+            definition: Named("llvm.x86.fma.vfnmadd.ps.256")
+        },
+        "256_fnmadd_pd" => Intrinsic {
+            inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)],
+            output: v(f(64), 4),
+            definition: Named("llvm.x86.fma.vfnmadd.pd.256")
+        },
+        "_fnmsub_ps" => Intrinsic {
+            inputs: vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)],
+            output: v(f(32), 4),
+            definition: Named("llvm.x86.fma.vfnmsub.ps")
+        },
+        "_fnmsub_pd" => Intrinsic {
+            inputs: vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)],
+            output: v(f(64), 2),
+            definition: Named("llvm.x86.fma.vfnmsub.pd")
+        },
+        "256_fnmsub_ps" => Intrinsic {
+            inputs: vec![v(f(32), 8), v(f(32), 8), v(f(32), 8)],
+            output: v(f(32), 8),
+            definition: Named("llvm.x86.fma.vfnmsub.ps.256")
+        },
+        "256_fnmsub_pd" => Intrinsic {
+            inputs: vec![v(f(64), 4), v(f(64), 4), v(f(64), 4)],
+            output: v(f(64), 4),
+            definition: Named("llvm.x86.fma.vfnmsub.pd.256")
+        },
         _ => return None,
     })
 }