diff options
| author | gnzlbg <gonzalobg88@gmail.com> | 2018-03-13 16:46:55 +0100 |
|---|---|---|
| committer | gnzlbg <gonzalobg88@gmail.com> | 2018-03-13 16:47:48 +0100 |
| commit | 01cc5b3e195bb01088fdd59638f0d8c6d0a78142 (patch) | |
| tree | 417854020d43543e214bd8695a29114537fdae12 | |
| parent | e5acb0c8f69d615b46c8e3eed4c84c3abd6fd0cd (diff) | |
| download | rust-01cc5b3e195bb01088fdd59638f0d8c6d0a78142.tar.gz rust-01cc5b3e195bb01088fdd59638f0d8c6d0a78142.zip | |
add intrinsics for portable packed simd vector reductions
| -rw-r--r-- | src/librustc_llvm/ffi.rs | 40 | ||||
| -rw-r--r-- | src/librustc_trans/builder.rs | 75 | ||||
| -rw-r--r-- | src/librustc_trans/intrinsic.rs | 219 | ||||
| -rw-r--r-- | src/librustc_typeck/check/intrinsic.rs | 5 | ||||
| -rw-r--r-- | src/rustllvm/RustWrapper.cpp | 46 | ||||
| -rw-r--r-- | src/test/run-pass/simd-intrinsic-generic-reduction.rs | 143 |
6 files changed, 525 insertions, 3 deletions
diff --git a/src/librustc_llvm/ffi.rs b/src/librustc_llvm/ffi.rs index 0ec5700f5f3..00547017349 100644 --- a/src/librustc_llvm/ffi.rs +++ b/src/librustc_llvm/ffi.rs @@ -1201,6 +1201,46 @@ extern "C" { Name: *const c_char) -> ValueRef; + pub fn LLVMRustBuildVectorReduceFAdd(B: BuilderRef, + Acc: ValueRef, + Src: ValueRef) + -> ValueRef; + pub fn LLVMRustBuildVectorReduceFMul(B: BuilderRef, + Acc: ValueRef, + Src: ValueRef) + -> ValueRef; + pub fn LLVMRustBuildVectorReduceAdd(B: BuilderRef, + Src: ValueRef) + -> ValueRef; + pub fn LLVMRustBuildVectorReduceMul(B: BuilderRef, + Src: ValueRef) + -> ValueRef; + pub fn LLVMRustBuildVectorReduceAnd(B: BuilderRef, + Src: ValueRef) + -> ValueRef; + pub fn LLVMRustBuildVectorReduceOr(B: BuilderRef, + Src: ValueRef) + -> ValueRef; + pub fn LLVMRustBuildVectorReduceXor(B: BuilderRef, + Src: ValueRef) + -> ValueRef; + pub fn LLVMRustBuildVectorReduceMin(B: BuilderRef, + Src: ValueRef, + IsSigned: bool) + -> ValueRef; + pub fn LLVMRustBuildVectorReduceMax(B: BuilderRef, + Src: ValueRef, + IsSigned: bool) + -> ValueRef; + pub fn LLVMRustBuildVectorReduceFMin(B: BuilderRef, + Src: ValueRef, + IsNaN: bool) + -> ValueRef; + pub fn LLVMRustBuildVectorReduceFMax(B: BuilderRef, + Src: ValueRef, + IsNaN: bool) + -> ValueRef; + pub fn LLVMBuildIsNull(B: BuilderRef, Val: ValueRef, Name: *const c_char) -> ValueRef; pub fn LLVMBuildIsNotNull(B: BuilderRef, Val: ValueRef, Name: *const c_char) -> ValueRef; pub fn LLVMBuildPtrDiff(B: BuilderRef, diff --git a/src/librustc_trans/builder.rs b/src/librustc_trans/builder.rs index d4e05a18e3a..2c38197d68e 100644 --- a/src/librustc_trans/builder.rs +++ b/src/librustc_trans/builder.rs @@ -955,6 +955,81 @@ impl<'a, 'tcx> Builder<'a, 'tcx> { } } + pub fn vector_reduce_fadd_fast(&self, acc: ValueRef, src: ValueRef) -> ValueRef { + self.count_insn("vector.reduce.fadd_fast"); + unsafe { + let instr = llvm::LLVMRustBuildVectorReduceFAdd(self.llbuilder, acc, src); + llvm::LLVMRustSetHasUnsafeAlgebra(instr); + instr + } + } + pub fn vector_reduce_fmul_fast(&self, acc: ValueRef, src: ValueRef) -> ValueRef { + self.count_insn("vector.reduce.fmul_fast"); + unsafe { + let instr = llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src); + llvm::LLVMRustSetHasUnsafeAlgebra(instr); + instr + } + } + pub fn vector_reduce_add(&self, src: ValueRef) -> ValueRef { + self.count_insn("vector.reduce.add"); + unsafe { + llvm::LLVMRustBuildVectorReduceAdd(self.llbuilder, src) + } + } + pub fn vector_reduce_mul(&self, src: ValueRef) -> ValueRef { + self.count_insn("vector.reduce.mul"); + unsafe { + llvm::LLVMRustBuildVectorReduceMul(self.llbuilder, src) + } + } + pub fn vector_reduce_and(&self, src: ValueRef) -> ValueRef { + self.count_insn("vector.reduce.and"); + unsafe { + llvm::LLVMRustBuildVectorReduceAnd(self.llbuilder, src) + } + } + pub fn vector_reduce_or(&self, src: ValueRef) -> ValueRef { + self.count_insn("vector.reduce.or"); + unsafe { + llvm::LLVMRustBuildVectorReduceOr(self.llbuilder, src) + } + } + pub fn vector_reduce_xor(&self, src: ValueRef) -> ValueRef { + self.count_insn("vector.reduce.xor"); + unsafe { + llvm::LLVMRustBuildVectorReduceXor(self.llbuilder, src) + } + } + pub fn vector_reduce_fmin_fast(&self, src: ValueRef) -> ValueRef { + self.count_insn("vector.reduce.fmin_fast"); + unsafe { + let instr = llvm::LLVMRustBuildVectorReduceFMin(self.llbuilder, src, false); + llvm::LLVMRustSetHasUnsafeAlgebra(instr); + instr + } + } + pub fn vector_reduce_fmax_fast(&self, src: ValueRef) -> ValueRef { + self.count_insn("vector.reduce.fmax_fast"); + unsafe { + let instr = llvm::LLVMRustBuildVectorReduceFMax(self.llbuilder, src, false); + llvm::LLVMRustSetHasUnsafeAlgebra(instr); + instr + } + } + pub fn vector_reduce_min(&self, src: ValueRef, is_signed: bool) -> ValueRef { + self.count_insn("vector.reduce.min"); + unsafe { + llvm::LLVMRustBuildVectorReduceMin(self.llbuilder, src, is_signed) + } + } + pub fn vector_reduce_max(&self, src: ValueRef, is_signed: bool) -> ValueRef { + self.count_insn("vector.reduce.max"); + unsafe { + llvm::LLVMRustBuildVectorReduceMax(self.llbuilder, src, is_signed) + } + } + pub fn extract_value(&self, agg_val: ValueRef, idx: u64) -> ValueRef { self.count_insn("extractvalue"); assert_eq!(idx as c_uint as u64, idx); diff --git a/src/librustc_trans/intrinsic.rs b/src/librustc_trans/intrinsic.rs index 3f87ce7e047..011273f02e1 100644 --- a/src/librustc_trans/intrinsic.rs +++ b/src/librustc_trans/intrinsic.rs @@ -1018,14 +1018,22 @@ fn generic_simd_intrinsic<'a, 'tcx>( name, $($fmt)*)); } } - macro_rules! require { - ($cond: expr, $($fmt: tt)*) => { - if !$cond { + macro_rules! return_error { + ($($fmt: tt)*) => { + { emit_error!($($fmt)*); return Err(()); } } } + + macro_rules! require { + ($cond: expr, $($fmt: tt)*) => { + if !$cond { + return_error!($($fmt)*); + } + }; + } macro_rules! require_simd { ($ty: expr, $position: expr) => { require!($ty.is_simd(), "expected SIMD {} type, found non-SIMD `{}`", $position, $ty) @@ -1142,6 +1150,211 @@ fn generic_simd_intrinsic<'a, 'tcx>( return Ok(bx.extract_element(args[0].immediate(), args[1].immediate())) } + if name == "simd_reduce_add" { + require!(ret_ty == in_elem, + "expected return type `{}` (element of input `{}`), found `{}`", + in_elem, in_ty, ret_ty); + return match in_elem.sty { + ty::TyInt(_i) => { + Ok(bx.vector_reduce_add(args[0].immediate())) + }, + ty::TyUint(_u) => { + Ok(bx.vector_reduce_add(args[0].immediate())) + }, + ty::TyFloat(f) => { + // undef as accumulator makes the reduction unordered: + let acc = match f.bit_width() { + 32 => C_undef(Type::f32(bx.cx)), + 64 => C_undef(Type::f64(bx.cx)), + v => { + return_error!( + "unsupported {} from `{}` with element `{}` of size `{}` to `{}`", + "simd_reduce_add", in_ty, in_elem, v, ret_ty) + } + }; + Ok(bx.vector_reduce_fadd_fast(acc, args[0].immediate())) + } + _ => { + return_error!("unsupported {} from `{}` with element `{}` to `{}`", + "simd_reduce_add", in_ty, in_elem, ret_ty) + }, + } + } + + if name == "simd_reduce_mul" { + require!(ret_ty == in_elem, + "expected return type `{}` (element of input `{}`), found `{}`", + in_elem, in_ty, ret_ty); + return match in_elem.sty { + ty::TyInt(_i) => { + Ok(bx.vector_reduce_mul(args[0].immediate())) + }, + ty::TyUint(_u) => { + Ok(bx.vector_reduce_mul(args[0].immediate())) + }, + ty::TyFloat(f) => { + // undef as accumulator makes the reduction unordered: + let acc = match f.bit_width() { + 32 => C_undef(Type::f32(bx.cx)), + 64 => C_undef(Type::f64(bx.cx)), + v => { + return_error!( + "unsupported {} from `{}` with element `{}` of size `{}` to `{}`", + "simd_reduce_mul", in_ty, in_elem, v, ret_ty) + } + }; + Ok(bx.vector_reduce_fmul_fast(acc, args[0].immediate())) + } + _ => { + return_error!("unsupported {} from `{}` with element `{}` to `{}`", + "simd_reduce_mul", in_ty, in_elem, ret_ty) + }, + } + } + + if name == "simd_reduce_min" { + require!(ret_ty == in_elem, + "expected return type `{}` (element of input `{}`), found `{}`", + in_elem, in_ty, ret_ty); + return match in_elem.sty { + ty::TyInt(_i) => { + Ok(bx.vector_reduce_min(args[0].immediate(), true)) + }, + ty::TyUint(_u) => { + Ok(bx.vector_reduce_min(args[0].immediate(), false)) + }, + ty::TyFloat(_f) => { + Ok(bx.vector_reduce_fmin_fast(args[0].immediate())) + } + _ => { + return_error!("unsupported {} from `{}` with element `{}` to `{}`", + "simd_reduce_min", in_ty, in_elem, ret_ty) + }, + } + } + + if name == "simd_reduce_max" { + require!(ret_ty == in_elem, + "expected return type `{}` (element of input `{}`), found `{}`", + in_elem, in_ty, ret_ty); + return match in_elem.sty { + ty::TyInt(_i) => { + Ok(bx.vector_reduce_max(args[0].immediate(), true)) + }, + ty::TyUint(_u) => { + Ok(bx.vector_reduce_max(args[0].immediate(), false)) + }, + ty::TyFloat(_f) => { + Ok(bx.vector_reduce_fmax_fast(args[0].immediate())) + } + _ => { + return_error!("unsupported {} from `{}` with element `{}` to `{}`", + "simd_reduce_max", in_ty, in_elem, ret_ty) + }, + } + } + + if name == "simd_reduce_and" { + require!(ret_ty == in_elem, + "expected return type `{}` (element of input `{}`), found `{}`", + in_elem, in_ty, ret_ty); + return match in_elem.sty { + ty::TyInt(_i) => { + Ok(bx.vector_reduce_and(args[0].immediate())) + }, + ty::TyUint(_u) => { + Ok(bx.vector_reduce_and(args[0].immediate())) + }, + _ => { + return_error!("unsupported {} from `{}` with element `{}` to `{}`", + "simd_reduce_and", in_ty, in_elem, ret_ty) + }, + } + } + + if name == "simd_reduce_or" { + require!(ret_ty == in_elem, + "expected return type `{}` (element of input `{}`), found `{}`", + in_elem, in_ty, ret_ty); + return match in_elem.sty { + ty::TyInt(_i) => { + Ok(bx.vector_reduce_or(args[0].immediate())) + }, + ty::TyUint(_u) => { + Ok(bx.vector_reduce_or(args[0].immediate())) + }, + _ => { + return_error!("unsupported {} from `{}` with element `{}` to `{}`", + "simd_reduce_or", in_ty, in_elem, ret_ty) + }, + } + } + + if name == "simd_reduce_xor" { + require!(ret_ty == in_elem, + "expected return type `{}` (element of input `{}`), found `{}`", + in_elem, in_ty, ret_ty); + return match in_elem.sty { + ty::TyInt(_i) => { + Ok(bx.vector_reduce_xor(args[0].immediate())) + }, + ty::TyUint(_u) => { + Ok(bx.vector_reduce_xor(args[0].immediate())) + }, + _ => { + return_error!("unsupported {} from `{}` with element `{}` to `{}`", + "simd_reduce_xor", in_ty, in_elem, ret_ty) + }, + } + } + + if name == "simd_reduce_all" { + //require!(ret_ty == in_elem, + // "expected return type `{}` (element of input `{}`), found `{}`", + // in_elem, in_ty, ret_ty); + let i1 = Type::i1(bx.cx); + let i1xn = Type::vector(&i1, in_len as u64); + let v = bx.trunc(args[0].immediate(), i1xn); + + let red = match in_elem.sty { + ty::TyInt(_i) => { + bx.vector_reduce_and(v) + }, + ty::TyUint(_u) => { + bx.vector_reduce_and(v) + }, + _ => { + return_error!("unsupported {} from `{}` with element `{}` to `{}`", + "simd_reduce_and", in_ty, in_elem, ret_ty) + }, + }; + return Ok(bx.zext(red, Type::bool(bx.cx))); + } + + if name == "simd_reduce_any" { + //require!(ret_ty == in_elem, + // "expected return type `{}` (element of input `{}`), found `{}`", + // in_elem, in_ty, ret_ty); + let i1 = Type::i1(bx.cx); + let i1xn = Type::vector(&i1, in_len as u64); + let v = bx.trunc(args[0].immediate(), i1xn); + + let red = match in_elem.sty { + ty::TyInt(_i) => { + bx.vector_reduce_or(v) + }, + ty::TyUint(_u) => { + bx.vector_reduce_or(v) + }, + _ => { + return_error!("unsupported {} from `{}` with element `{}` to `{}`", + "simd_reduce_and", in_ty, in_elem, ret_ty) + }, + }; + return Ok(bx.zext(red, Type::bool(bx.cx))); + } + + if name == "simd_cast" { require_simd!(ret_ty, "return"); let out_len = ret_ty.simd_size(tcx); diff --git a/src/librustc_typeck/check/intrinsic.rs b/src/librustc_typeck/check/intrinsic.rs index 2e00040d99a..f2d01c57f29 100644 --- a/src/librustc_typeck/check/intrinsic.rs +++ b/src/librustc_typeck/check/intrinsic.rs @@ -361,6 +361,11 @@ pub fn check_platform_intrinsic_type<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, "simd_insert" => (2, vec![param(0), tcx.types.u32, param(1)], param(0)), "simd_extract" => (2, vec![param(0), tcx.types.u32], param(1)), "simd_cast" => (2, vec![param(0)], param(1)), + "simd_reduce_all" | "simd_reduce_any" => (1, vec![param(0)], tcx.types.bool), + "simd_reduce_add" | "simd_reduce_mul" | + "simd_reduce_and" | "simd_reduce_or" | "simd_reduce_xor" | + "simd_reduce_min" | "simd_reduce_max" + => (2, vec![param(0)], param(1)), name if name.starts_with("simd_shuffle") => { match name["simd_shuffle".len()..].parse() { Ok(n) => { diff --git a/src/rustllvm/RustWrapper.cpp b/src/rustllvm/RustWrapper.cpp index 27d5496f576..e749549201e 100644 --- a/src/rustllvm/RustWrapper.cpp +++ b/src/rustllvm/RustWrapper.cpp @@ -1395,3 +1395,49 @@ LLVMRustModuleCost(LLVMModuleRef M) { auto f = unwrap(M)->functions(); return std::distance(std::begin(f), std::end(f)); } + +// Vector reductions: +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceFAdd(LLVMBuilderRef B, LLVMValueRef Acc, LLVMValueRef Src) { + return wrap(unwrap(B)->CreateFAddReduce(unwrap(Acc),unwrap(Src))); +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceFMul(LLVMBuilderRef B, LLVMValueRef Acc, LLVMValueRef Src) { + return wrap(unwrap(B)->CreateFMulReduce(unwrap(Acc),unwrap(Src))); +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceAdd(LLVMBuilderRef B, LLVMValueRef Src) { + return wrap(unwrap(B)->CreateAddReduce(unwrap(Src))); +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceMul(LLVMBuilderRef B, LLVMValueRef Src) { + return wrap(unwrap(B)->CreateMulReduce(unwrap(Src))); +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceAnd(LLVMBuilderRef B, LLVMValueRef Src) { + return wrap(unwrap(B)->CreateAndReduce(unwrap(Src))); +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceOr(LLVMBuilderRef B, LLVMValueRef Src) { + return wrap(unwrap(B)->CreateOrReduce(unwrap(Src))); +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceXor(LLVMBuilderRef B, LLVMValueRef Src) { + return wrap(unwrap(B)->CreateXorReduce(unwrap(Src))); +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceMin(LLVMBuilderRef B, LLVMValueRef Src, bool IsSigned) { + return wrap(unwrap(B)->CreateIntMinReduce(unwrap(Src), IsSigned)); +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceMax(LLVMBuilderRef B, LLVMValueRef Src, bool IsSigned) { + return wrap(unwrap(B)->CreateIntMaxReduce(unwrap(Src), IsSigned)); +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceFMin(LLVMBuilderRef B, LLVMValueRef Src, bool NoNaN) { + return wrap(unwrap(B)->CreateFPMinReduce(unwrap(Src), NoNaN)); +} +extern "C" LLVMValueRef +LLVMRustBuildVectorReduceFMax(LLVMBuilderRef B, LLVMValueRef Src, bool NoNaN) { + return wrap(unwrap(B)->CreateFPMaxReduce(unwrap(Src), NoNaN)); +} diff --git a/src/test/run-pass/simd-intrinsic-generic-reduction.rs b/src/test/run-pass/simd-intrinsic-generic-reduction.rs new file mode 100644 index 00000000000..15b291ae179 --- /dev/null +++ b/src/test/run-pass/simd-intrinsic-generic-reduction.rs @@ -0,0 +1,143 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Test that the simd_reduce_{op} intrinsics produce the correct results. + +#![feature(repr_simd, platform_intrinsics)] +#[allow(non_camel_case_types)] + +#[repr(simd)] +#[derive(Copy, Clone)] +struct i32x4(pub i32, pub i32, pub i32, pub i32); + +#[repr(simd)] +#[derive(Copy, Clone)] +struct u32x4(pub u32, pub u32, pub u32, pub u32); + +#[repr(simd)] +#[derive(Copy, Clone)] +struct f32x4(pub f32, pub f32, pub f32, pub f32); + +#[repr(simd)] +#[derive(Copy, Clone)] +struct b8x4(pub i8, pub i8, pub i8, pub i8); + +#[repr(simd)] +#[derive(Copy, Clone)] +struct b8x16( + pub i8, pub i8, pub i8, pub i8, + pub i8, pub i8, pub i8, pub i8, + pub i8, pub i8, pub i8, pub i8, + pub i8, pub i8, pub i8, pub i8 +); + +extern "platform-intrinsic" { + fn simd_reduce_add<T, U>(x: T) -> U; + fn simd_reduce_mul<T, U>(x: T) -> U; + fn simd_reduce_min<T, U>(x: T) -> U; + fn simd_reduce_max<T, U>(x: T) -> U; + fn simd_reduce_and<T, U>(x: T) -> U; + fn simd_reduce_or<T, U>(x: T) -> U; + fn simd_reduce_xor<T, U>(x: T) -> U; + fn simd_reduce_all<T>(x: T) -> bool; + fn simd_reduce_any<T>(x: T) -> bool; +} + +fn main() { + unsafe { + let x = i32x4(1, -2, 3, 4); + let r: i32 = simd_reduce_add(x); + assert!(r == 6_i32); + let r: i32 = simd_reduce_mul(x); + assert!(r == -24_i32); + let r: i32 = simd_reduce_min(x); + assert!(r == -21_i32); + let r: i32 = simd_reduce_max(x); + assert!(r == 4_i32); + + let x = i32x4(-1, -1, -1, -1); + let r: i32 = simd_reduce_and(x); + assert!(r == -1_i32); + let r: i32 = simd_reduce_or(x); + assert!(r == -1_i32); + let r: i32 = simd_reduce_xor(x); + assert!(r == 0_i32); + + let x = i32x4(-1, -1, 0, -1); + let r: i32 = simd_reduce_and(x); + assert!(r == 0_i32); + let r: i32 = simd_reduce_or(x); + assert!(r == -1_i32); + let r: i32 = simd_reduce_xor(x); + assert!(r == -1_i32); + } + + unsafe { + let x = u32x4(1, 2, 3, 4); + let r: u32 = simd_reduce_add(x); + assert!(r == 10_u32); + let r: u32 = simd_reduce_mul(x); + assert!(r == 24_u32); + let r: u32 = simd_reduce_min(x); + assert!(r == 1_u32); + let r: u32 = simd_reduce_max(x); + assert!(r == 4_u32); + + let t = u32::max_value(); + let x = u32x4(t, t, t, t); + let r: u32 = simd_reduce_and(x); + assert!(r == t); + let r: u32 = simd_reduce_or(x); + assert!(r == t); + let r: u32 = simd_reduce_xor(x); + assert!(r == 0_u32); + + let x = u32x4(t, t, 0, t); + let r: u32 = simd_reduce_and(x); + assert!(r == 0_u32); + let r: u32 = simd_reduce_or(x); + assert!(r == t); + let r: u32 = simd_reduce_xor(x); + assert!(r == t); + } + + unsafe { + let x = f32x4(1., -2., 3., 4.); + let r: f32 = simd_reduce_add(x); + assert!(r == 6_f32); + let r: f32 = simd_reduce_mul(x); + assert!(r == -24_f32); + let r: f32 = simd_reduce_min(x); + assert!(r == -2_f32); + let r: f32 = simd_reduce_max(x); + assert!(r == 4_f32); + } + + unsafe { + let x = b8x4(!0, !0, !0, !0); + let r: bool = simd_reduce_all(x); + //let r: bool = foobar(x); + assert!(r); + let r: bool = simd_reduce_any(x); + assert!(r); + + let x = b8x4(!0, !0, 0, !0); + let r: bool = simd_reduce_all(x); + assert!(!r); + let r: bool = simd_reduce_any(x); + assert!(r); + + let x = b8x4(0, 0, 0, 0); + let r: bool = simd_reduce_all(x); + assert!(!r); + let r: bool = simd_reduce_any(x); + assert!(!r); + } +} |
