diff options
| author | gnzlbg <gonzalobg88@gmail.com> | 2018-03-14 17:22:40 +0100 |
|---|---|---|
| committer | gnzlbg <gonzalobg88@gmail.com> | 2018-03-14 17:22:40 +0100 |
| commit | 07ce659dd03523b526cb4804b9ef882f31e9ecf3 (patch) | |
| tree | 59bfa5b8c609b58e320e827b79347fd466ffb797 | |
| parent | 01cc5b3e195bb01088fdd59638f0d8c6d0a78142 (diff) | |
| download | rust-07ce659dd03523b526cb4804b9ef882f31e9ecf3.tar.gz rust-07ce659dd03523b526cb4804b9ef882f31e9ecf3.zip | |
expose ordered/unordered/nanless intirnsics
| -rw-r--r-- | src/librustc_trans/builder.rs | 18 | ||||
| -rw-r--r-- | src/librustc_trans/intrinsic.rs | 300 | ||||
| -rw-r--r-- | src/librustc_typeck/check/intrinsic.rs | 7 | ||||
| -rw-r--r-- | src/rustllvm/RustWrapper.cpp | 2 | ||||
| -rw-r--r-- | src/test/run-pass/simd-intrinsic-generic-reduction.rs | 104 |
5 files changed, 202 insertions, 229 deletions
diff --git a/src/librustc_trans/builder.rs b/src/librustc_trans/builder.rs index 2c38197d68e..371f53013b9 100644 --- a/src/librustc_trans/builder.rs +++ b/src/librustc_trans/builder.rs @@ -958,6 +958,9 @@ impl<'a, 'tcx> Builder<'a, 'tcx> { pub fn vector_reduce_fadd_fast(&self, acc: ValueRef, src: ValueRef) -> ValueRef { self.count_insn("vector.reduce.fadd_fast"); unsafe { + // FIXME: add a non-fast math version once + // https://bugs.llvm.org/show_bug.cgi?id=36732 + // is fixed. let instr = llvm::LLVMRustBuildVectorReduceFAdd(self.llbuilder, acc, src); llvm::LLVMRustSetHasUnsafeAlgebra(instr); instr @@ -966,6 +969,9 @@ impl<'a, 'tcx> Builder<'a, 'tcx> { pub fn vector_reduce_fmul_fast(&self, acc: ValueRef, src: ValueRef) -> ValueRef { self.count_insn("vector.reduce.fmul_fast"); unsafe { + // FIXME: add a non-fast math version once + // https://bugs.llvm.org/show_bug.cgi?id=36732 + // is fixed. let instr = llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src); llvm::LLVMRustSetHasUnsafeAlgebra(instr); instr @@ -1001,6 +1007,18 @@ impl<'a, 'tcx> Builder<'a, 'tcx> { llvm::LLVMRustBuildVectorReduceXor(self.llbuilder, src) } } + pub fn vector_reduce_fmin(&self, src: ValueRef) -> ValueRef { + self.count_insn("vector.reduce.fmin"); + unsafe { + llvm::LLVMRustBuildVectorReduceFMin(self.llbuilder, src, true) + } + } + pub fn vector_reduce_fmax(&self, src: ValueRef) -> ValueRef { + self.count_insn("vector.reduce.fmax"); + unsafe { + llvm::LLVMRustBuildVectorReduceFMax(self.llbuilder, src, true) + } + } pub fn vector_reduce_fmin_fast(&self, src: ValueRef) -> ValueRef { self.count_insn("vector.reduce.fmin_fast"); unsafe { diff --git a/src/librustc_trans/intrinsic.rs b/src/librustc_trans/intrinsic.rs index 011273f02e1..8b62a1be80c 100644 --- a/src/librustc_trans/intrinsic.rs +++ b/src/librustc_trans/intrinsic.rs @@ -1150,210 +1150,134 @@ fn generic_simd_intrinsic<'a, 'tcx>( return Ok(bx.extract_element(args[0].immediate(), args[1].immediate())) } - if name == "simd_reduce_add" { - require!(ret_ty == in_elem, - "expected return type `{}` (element of input `{}`), found `{}`", - in_elem, in_ty, ret_ty); - return match in_elem.sty { - ty::TyInt(_i) => { - Ok(bx.vector_reduce_add(args[0].immediate())) - }, - ty::TyUint(_u) => { - Ok(bx.vector_reduce_add(args[0].immediate())) - }, - ty::TyFloat(f) => { - // undef as accumulator makes the reduction unordered: - let acc = match f.bit_width() { - 32 => C_undef(Type::f32(bx.cx)), - 64 => C_undef(Type::f64(bx.cx)), - v => { - return_error!( - "unsupported {} from `{}` with element `{}` of size `{}` to `{}`", - "simd_reduce_add", in_ty, in_elem, v, ret_ty) + macro_rules! arith_red { + ($name:tt : $integer_reduce:ident, $float_reduce:ident, $ordered:expr) => { + if name == $name { + require!(ret_ty == in_elem, + "expected return type `{}` (element of input `{}`), found `{}`", + in_elem, in_ty, ret_ty); + return match in_elem.sty { + ty::TyInt(_) | ty::TyUint(_) => { + let r = bx.$integer_reduce(args[0].immediate()); + if $ordered { + // if overflow occurs, the result is the + // mathematical result modulo 2^n: + if name.contains("mul") { + Ok(bx.mul(args[1].immediate(), r)) + } else { + Ok(bx.add(args[1].immediate(), r)) + } + } else { + Ok(bx.$integer_reduce(args[0].immediate())) + } + }, + ty::TyFloat(f) => { + // ordered arithmetic reductions take an accumulator + let acc = if $ordered { + args[1].immediate() + } else { + // unordered arithmetic reductions do not: + match f.bit_width() { + 32 => C_undef(Type::f32(bx.cx)), + 64 => C_undef(Type::f64(bx.cx)), + v => { + return_error!( + "unsupported {} from `{}` with element `{}` of size `{}` to `{}`", + $name, in_ty, in_elem, v, ret_ty + ) + } + } + + }; + Ok(bx.$float_reduce(acc, args[0].immediate())) } - }; - Ok(bx.vector_reduce_fadd_fast(acc, args[0].immediate())) + _ => { + return_error!( + "unsupported {} from `{}` with element `{}` to `{}`", + $name, in_ty, in_elem, ret_ty + ) + }, + } } - _ => { - return_error!("unsupported {} from `{}` with element `{}` to `{}`", - "simd_reduce_add", in_ty, in_elem, ret_ty) - }, } } - if name == "simd_reduce_mul" { - require!(ret_ty == in_elem, - "expected return type `{}` (element of input `{}`), found `{}`", - in_elem, in_ty, ret_ty); - return match in_elem.sty { - ty::TyInt(_i) => { - Ok(bx.vector_reduce_mul(args[0].immediate())) - }, - ty::TyUint(_u) => { - Ok(bx.vector_reduce_mul(args[0].immediate())) - }, - ty::TyFloat(f) => { - // undef as accumulator makes the reduction unordered: - let acc = match f.bit_width() { - 32 => C_undef(Type::f32(bx.cx)), - 64 => C_undef(Type::f64(bx.cx)), - v => { - return_error!( - "unsupported {} from `{}` with element `{}` of size `{}` to `{}`", - "simd_reduce_mul", in_ty, in_elem, v, ret_ty) + arith_red!("simd_reduce_add_ordered": vector_reduce_add, vector_reduce_fadd_fast, true); + arith_red!("simd_reduce_mul_ordered": vector_reduce_mul, vector_reduce_fmul_fast, true); + arith_red!("simd_reduce_add_unordered": vector_reduce_add, vector_reduce_fadd_fast, false); + arith_red!("simd_reduce_mul_unordered": vector_reduce_mul, vector_reduce_fmul_fast, false); + + macro_rules! minmax_red { + ($name:tt: $int_red:ident, $float_red:ident) => { + if name == $name { + require!(ret_ty == in_elem, + "expected return type `{}` (element of input `{}`), found `{}`", + in_elem, in_ty, ret_ty); + return match in_elem.sty { + ty::TyInt(_i) => { + Ok(bx.$int_red(args[0].immediate(), true)) + }, + ty::TyUint(_u) => { + Ok(bx.$int_red(args[0].immediate(), false)) + }, + ty::TyFloat(_f) => { + Ok(bx.$float_red(args[0].immediate())) } - }; - Ok(bx.vector_reduce_fmul_fast(acc, args[0].immediate())) + _ => { + return_error!("unsupported {} from `{}` with element `{}` to `{}`", + $name, in_ty, in_elem, ret_ty) + }, + } } - _ => { - return_error!("unsupported {} from `{}` with element `{}` to `{}`", - "simd_reduce_mul", in_ty, in_elem, ret_ty) - }, - } - } - if name == "simd_reduce_min" { - require!(ret_ty == in_elem, - "expected return type `{}` (element of input `{}`), found `{}`", - in_elem, in_ty, ret_ty); - return match in_elem.sty { - ty::TyInt(_i) => { - Ok(bx.vector_reduce_min(args[0].immediate(), true)) - }, - ty::TyUint(_u) => { - Ok(bx.vector_reduce_min(args[0].immediate(), false)) - }, - ty::TyFloat(_f) => { - Ok(bx.vector_reduce_fmin_fast(args[0].immediate())) - } - _ => { - return_error!("unsupported {} from `{}` with element `{}` to `{}`", - "simd_reduce_min", in_ty, in_elem, ret_ty) - }, } } - if name == "simd_reduce_max" { - require!(ret_ty == in_elem, - "expected return type `{}` (element of input `{}`), found `{}`", - in_elem, in_ty, ret_ty); - return match in_elem.sty { - ty::TyInt(_i) => { - Ok(bx.vector_reduce_max(args[0].immediate(), true)) - }, - ty::TyUint(_u) => { - Ok(bx.vector_reduce_max(args[0].immediate(), false)) - }, - ty::TyFloat(_f) => { - Ok(bx.vector_reduce_fmax_fast(args[0].immediate())) - } - _ => { - return_error!("unsupported {} from `{}` with element `{}` to `{}`", - "simd_reduce_max", in_ty, in_elem, ret_ty) - }, - } - } + minmax_red!("simd_reduce_min": vector_reduce_min, vector_reduce_fmin); + minmax_red!("simd_reduce_max": vector_reduce_max, vector_reduce_fmax); - if name == "simd_reduce_and" { - require!(ret_ty == in_elem, - "expected return type `{}` (element of input `{}`), found `{}`", - in_elem, in_ty, ret_ty); - return match in_elem.sty { - ty::TyInt(_i) => { - Ok(bx.vector_reduce_and(args[0].immediate())) - }, - ty::TyUint(_u) => { - Ok(bx.vector_reduce_and(args[0].immediate())) - }, - _ => { - return_error!("unsupported {} from `{}` with element `{}` to `{}`", - "simd_reduce_and", in_ty, in_elem, ret_ty) - }, - } - } + minmax_red!("simd_reduce_min_nanless": vector_reduce_min, vector_reduce_fmin_fast); + minmax_red!("simd_reduce_max_nanless": vector_reduce_max, vector_reduce_fmax_fast); - if name == "simd_reduce_or" { - require!(ret_ty == in_elem, - "expected return type `{}` (element of input `{}`), found `{}`", - in_elem, in_ty, ret_ty); - return match in_elem.sty { - ty::TyInt(_i) => { - Ok(bx.vector_reduce_or(args[0].immediate())) - }, - ty::TyUint(_u) => { - Ok(bx.vector_reduce_or(args[0].immediate())) - }, - _ => { - return_error!("unsupported {} from `{}` with element `{}` to `{}`", - "simd_reduce_or", in_ty, in_elem, ret_ty) - }, - } - } - - if name == "simd_reduce_xor" { - require!(ret_ty == in_elem, - "expected return type `{}` (element of input `{}`), found `{}`", - in_elem, in_ty, ret_ty); - return match in_elem.sty { - ty::TyInt(_i) => { - Ok(bx.vector_reduce_xor(args[0].immediate())) - }, - ty::TyUint(_u) => { - Ok(bx.vector_reduce_xor(args[0].immediate())) - }, - _ => { - return_error!("unsupported {} from `{}` with element `{}` to `{}`", - "simd_reduce_xor", in_ty, in_elem, ret_ty) - }, + macro_rules! bitwise_red { + ($name:tt : $red:ident, $boolean:expr) => { + if name == $name { + let input = if !$boolean { + require!(ret_ty == in_elem, + "expected return type `{}` (element of input `{}`), found `{}`", + in_elem, in_ty, ret_ty); + args[0].immediate() + } else { + // boolean reductions operate on vectors of i1s: + let i1 = Type::i1(bx.cx); + let i1xn = Type::vector(&i1, in_len as u64); + bx.trunc(args[0].immediate(), i1xn) + }; + return match in_elem.sty { + ty::TyInt(_) | ty::TyUint(_) => { + let r = bx.$red(input); + Ok( + if !$boolean { + r + } else { + bx.zext(r, Type::bool(bx.cx)) + } + ) + }, + _ => { + return_error!("unsupported {} from `{}` with element `{}` to `{}`", + $name, in_ty, in_elem, ret_ty) + }, + } + } } } - if name == "simd_reduce_all" { - //require!(ret_ty == in_elem, - // "expected return type `{}` (element of input `{}`), found `{}`", - // in_elem, in_ty, ret_ty); - let i1 = Type::i1(bx.cx); - let i1xn = Type::vector(&i1, in_len as u64); - let v = bx.trunc(args[0].immediate(), i1xn); - - let red = match in_elem.sty { - ty::TyInt(_i) => { - bx.vector_reduce_and(v) - }, - ty::TyUint(_u) => { - bx.vector_reduce_and(v) - }, - _ => { - return_error!("unsupported {} from `{}` with element `{}` to `{}`", - "simd_reduce_and", in_ty, in_elem, ret_ty) - }, - }; - return Ok(bx.zext(red, Type::bool(bx.cx))); - } - - if name == "simd_reduce_any" { - //require!(ret_ty == in_elem, - // "expected return type `{}` (element of input `{}`), found `{}`", - // in_elem, in_ty, ret_ty); - let i1 = Type::i1(bx.cx); - let i1xn = Type::vector(&i1, in_len as u64); - let v = bx.trunc(args[0].immediate(), i1xn); - - let red = match in_elem.sty { - ty::TyInt(_i) => { - bx.vector_reduce_or(v) - }, - ty::TyUint(_u) => { - bx.vector_reduce_or(v) - }, - _ => { - return_error!("unsupported {} from `{}` with element `{}` to `{}`", - "simd_reduce_and", in_ty, in_elem, ret_ty) - }, - }; - return Ok(bx.zext(red, Type::bool(bx.cx))); - } - + bitwise_red!("simd_reduce_and": vector_reduce_and, false); + bitwise_red!("simd_reduce_or": vector_reduce_or, false); + bitwise_red!("simd_reduce_xor": vector_reduce_xor, false); + bitwise_red!("simd_reduce_all": vector_reduce_and, true); + bitwise_red!("simd_reduce_any": vector_reduce_or, true); if name == "simd_cast" { require_simd!(ret_ty, "return"); diff --git a/src/librustc_typeck/check/intrinsic.rs b/src/librustc_typeck/check/intrinsic.rs index f2d01c57f29..b87b8aa0bdb 100644 --- a/src/librustc_typeck/check/intrinsic.rs +++ b/src/librustc_typeck/check/intrinsic.rs @@ -362,9 +362,12 @@ pub fn check_platform_intrinsic_type<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, "simd_extract" => (2, vec![param(0), tcx.types.u32], param(1)), "simd_cast" => (2, vec![param(0)], param(1)), "simd_reduce_all" | "simd_reduce_any" => (1, vec![param(0)], tcx.types.bool), - "simd_reduce_add" | "simd_reduce_mul" | + "simd_reduce_add_ordered" | "simd_reduce_mul_ordered" + => (2, vec![param(0), param(1)], param(1)), + "simd_reduce_add_unordered" | "simd_reduce_mul_unordered" | "simd_reduce_and" | "simd_reduce_or" | "simd_reduce_xor" | - "simd_reduce_min" | "simd_reduce_max" + "simd_reduce_min" | "simd_reduce_max" | + "simd_reduce_min_nanless" | "simd_reduce_max_nanless" => (2, vec![param(0)], param(1)), name if name.starts_with("simd_shuffle") => { match name["simd_shuffle".len()..].parse() { diff --git a/src/rustllvm/RustWrapper.cpp b/src/rustllvm/RustWrapper.cpp index e749549201e..9d5f9042f18 100644 --- a/src/rustllvm/RustWrapper.cpp +++ b/src/rustllvm/RustWrapper.cpp @@ -1397,6 +1397,7 @@ LLVMRustModuleCost(LLVMModuleRef M) { } // Vector reductions: +#if LLVM_VERSION_GE(6, 0) extern "C" LLVMValueRef LLVMRustBuildVectorReduceFAdd(LLVMBuilderRef B, LLVMValueRef Acc, LLVMValueRef Src) { return wrap(unwrap(B)->CreateFAddReduce(unwrap(Acc),unwrap(Src))); @@ -1441,3 +1442,4 @@ extern "C" LLVMValueRef LLVMRustBuildVectorReduceFMax(LLVMBuilderRef B, LLVMValueRef Src, bool NoNaN) { return wrap(unwrap(B)->CreateFPMaxReduce(unwrap(Src), NoNaN)); } +#endif diff --git a/src/test/run-pass/simd-intrinsic-generic-reduction.rs b/src/test/run-pass/simd-intrinsic-generic-reduction.rs index 15b291ae179..6755c92961b 100644 --- a/src/test/run-pass/simd-intrinsic-generic-reduction.rs +++ b/src/test/run-pass/simd-intrinsic-generic-reduction.rs @@ -39,10 +39,14 @@ struct b8x16( ); extern "platform-intrinsic" { - fn simd_reduce_add<T, U>(x: T) -> U; - fn simd_reduce_mul<T, U>(x: T) -> U; + fn simd_reduce_add_unordered<T, U>(x: T) -> U; + fn simd_reduce_mul_unordered<T, U>(x: T) -> U; + fn simd_reduce_add_ordered<T, U>(x: T, acc: U) -> U; + fn simd_reduce_mul_ordered<T, U>(x: T, acc: U) -> U; fn simd_reduce_min<T, U>(x: T) -> U; fn simd_reduce_max<T, U>(x: T) -> U; + fn simd_reduce_min_nanless<T, U>(x: T) -> U; + fn simd_reduce_max_nanless<T, U>(x: T) -> U; fn simd_reduce_and<T, U>(x: T) -> U; fn simd_reduce_or<T, U>(x: T) -> U; fn simd_reduce_xor<T, U>(x: T) -> U; @@ -53,91 +57,113 @@ extern "platform-intrinsic" { fn main() { unsafe { let x = i32x4(1, -2, 3, 4); - let r: i32 = simd_reduce_add(x); - assert!(r == 6_i32); - let r: i32 = simd_reduce_mul(x); - assert!(r == -24_i32); + let r: i32 = simd_reduce_add_unordered(x); + assert_eq!(r, 6_i32); + let r: i32 = simd_reduce_mul_unordered(x); + assert_eq!(r, -24_i32); + let r: i32 = simd_reduce_add_ordered(x, -1); + assert_eq!(r, 5_i32); + let r: i32 = simd_reduce_mul_ordered(x, -1); + assert_eq!(r, 24_i32); + let r: i32 = simd_reduce_min(x); - assert!(r == -21_i32); + assert_eq!(r, -2_i32); let r: i32 = simd_reduce_max(x); - assert!(r == 4_i32); + assert_eq!(r, 4_i32); let x = i32x4(-1, -1, -1, -1); let r: i32 = simd_reduce_and(x); - assert!(r == -1_i32); + assert_eq!(r, -1_i32); let r: i32 = simd_reduce_or(x); - assert!(r == -1_i32); + assert_eq!(r, -1_i32); let r: i32 = simd_reduce_xor(x); - assert!(r == 0_i32); + assert_eq!(r, 0_i32); let x = i32x4(-1, -1, 0, -1); let r: i32 = simd_reduce_and(x); - assert!(r == 0_i32); + assert_eq!(r, 0_i32); let r: i32 = simd_reduce_or(x); - assert!(r == -1_i32); + assert_eq!(r, -1_i32); let r: i32 = simd_reduce_xor(x); - assert!(r == -1_i32); + assert_eq!(r, -1_i32); } unsafe { let x = u32x4(1, 2, 3, 4); - let r: u32 = simd_reduce_add(x); - assert!(r == 10_u32); - let r: u32 = simd_reduce_mul(x); - assert!(r == 24_u32); + let r: u32 = simd_reduce_add_unordered(x); + assert_eq!(r, 10_u32); + let r: u32 = simd_reduce_mul_unordered(x); + assert_eq!(r, 24_u32); + let r: u32 = simd_reduce_add_ordered(x, 1); + assert_eq!(r, 11_u32); + let r: u32 = simd_reduce_mul_ordered(x, 2); + assert_eq!(r, 48_u32); + let r: u32 = simd_reduce_min(x); - assert!(r == 1_u32); + assert_eq!(r, 1_u32); let r: u32 = simd_reduce_max(x); - assert!(r == 4_u32); + assert_eq!(r, 4_u32); let t = u32::max_value(); let x = u32x4(t, t, t, t); let r: u32 = simd_reduce_and(x); - assert!(r == t); + assert_eq!(r, t); let r: u32 = simd_reduce_or(x); - assert!(r == t); + assert_eq!(r, t); let r: u32 = simd_reduce_xor(x); - assert!(r == 0_u32); + assert_eq!(r, 0_u32); let x = u32x4(t, t, 0, t); let r: u32 = simd_reduce_and(x); - assert!(r == 0_u32); + assert_eq!(r, 0_u32); let r: u32 = simd_reduce_or(x); - assert!(r == t); + assert_eq!(r, t); let r: u32 = simd_reduce_xor(x); - assert!(r == t); + assert_eq!(r, t); } unsafe { let x = f32x4(1., -2., 3., 4.); - let r: f32 = simd_reduce_add(x); - assert!(r == 6_f32); - let r: f32 = simd_reduce_mul(x); - assert!(r == -24_f32); + let r: f32 = simd_reduce_add_unordered(x); + assert_eq!(r, 6_f32); + let r: f32 = simd_reduce_mul_unordered(x); + assert_eq!(r, -24_f32); + // FIXME: only works correctly for accumulator, 0: + // https://bugs.llvm.org/show_bug.cgi?id=36734 + let r: f32 = simd_reduce_add_ordered(x, 0.); + assert_eq!(r, 6_f32); + // FIXME: only works correctly for accumulator, 1: + // https://bugs.llvm.org/show_bug.cgi?id=36734 + let r: f32 = simd_reduce_mul_ordered(x, 1.); + assert_eq!(r, -24_f32); + let r: f32 = simd_reduce_min(x); - assert!(r == -2_f32); + assert_eq!(r, -2_f32); let r: f32 = simd_reduce_max(x); - assert!(r == 4_f32); + assert_eq!(r, 4_f32); + let r: f32 = simd_reduce_min_nanless(x); + assert_eq!(r, -2_f32); + let r: f32 = simd_reduce_max_nanless(x); + assert_eq!(r, 4_f32); } unsafe { let x = b8x4(!0, !0, !0, !0); let r: bool = simd_reduce_all(x); - //let r: bool = foobar(x); - assert!(r); + assert_eq!(r, true); let r: bool = simd_reduce_any(x); - assert!(r); + assert_eq!(r, true); let x = b8x4(!0, !0, 0, !0); let r: bool = simd_reduce_all(x); - assert!(!r); + assert_eq!(r, false); let r: bool = simd_reduce_any(x); - assert!(r); + assert_eq!(r, true); let x = b8x4(0, 0, 0, 0); let r: bool = simd_reduce_all(x); - assert!(!r); + assert_eq!(r, false); let r: bool = simd_reduce_any(x); - assert!(!r); + assert_eq!(r, false); } } |
