diff options
| author | Huon Wilson <dbau.pp+github@gmail.com> | 2014-11-25 18:09:35 +1100 |
|---|---|---|
| committer | Huon Wilson <dbau.pp+github@gmail.com> | 2014-12-03 17:15:02 -0800 |
| commit | 1a620661b7ef7bbee10e56895e6ee081504b7378 (patch) | |
| tree | 795b852f9c2c4dd5cfc1f4491faa580f6e199b54 | |
| parent | 5f9741e62d7cfe26ca94a28716a95bc03d74e87a (diff) | |
| download | rust-1a620661b7ef7bbee10e56895e6ee081504b7378.tar.gz rust-1a620661b7ef7bbee10e56895e6ee081504b7378.zip | |
Special-case transmute for primitive, SIMD & pointer types.
This detects (a subset of) the cases when `transmute::<T, U>(x)` can be
lowered to a direct `bitcast T x to U` in LLVM. This assists with
efficiently handling a SIMD vector as multiple different types,
e.g. swapping bytes/words/double words around inside some larger vector
type.
C compilers like GCC and Clang handle integer vector types as `__m128i`
for all widths, and implicitly insert bitcasts as required. This patch
allows Rust to express this, even if it takes a bit of `unsafe`, whereas
previously it was impossible to do at all without inline assembly.
Example:
pub fn reverse_u32s(u: u64x2) -> u64x2 {
unsafe {
let tmp = mem::transmute::<_, u32x4>(u);
let swapped = u32x4(tmp.3, tmp.2, tmp.1, tmp.0);
mem::transmute::<_, u64x2>(swapped)
}
}
Compiling with `--opt-level=3` gives:
Before
define <2 x i64> @_ZN12reverse_u32s20hbdb206aba18a03d8tbaE(<2 x i64>) unnamed_addr #0 {
entry-block:
%1 = bitcast <2 x i64> %0 to i128
%u.0.extract.trunc = trunc i128 %1 to i32
%u.4.extract.shift = lshr i128 %1, 32
%u.4.extract.trunc = trunc i128 %u.4.extract.shift to i32
%u.8.extract.shift = lshr i128 %1, 64
%u.8.extract.trunc = trunc i128 %u.8.extract.shift to i32
%u.12.extract.shift = lshr i128 %1, 96
%u.12.extract.trunc = trunc i128 %u.12.extract.shift to i32
%2 = insertelement <4 x i32> undef, i32 %u.12.extract.trunc, i64 0
%3 = insertelement <4 x i32> %2, i32 %u.8.extract.trunc, i64 1
%4 = insertelement <4 x i32> %3, i32 %u.4.extract.trunc, i64 2
%5 = insertelement <4 x i32> %4, i32 %u.0.extract.trunc, i64 3
%6 = bitcast <4 x i32> %5 to <2 x i64>
ret <2 x i64> %6
}
_ZN12reverse_u32s20hbdb206aba18a03d8tbaE:
.cfi_startproc
movd %xmm0, %rax
punpckhqdq %xmm0, %xmm0
movd %xmm0, %rcx
movq %rcx, %rdx
shrq $32, %rdx
movq %rax, %rsi
shrq $32, %rsi
movd %eax, %xmm0
movd %ecx, %xmm1
punpckldq %xmm0, %xmm1
movd %esi, %xmm2
movd %edx, %xmm0
punpckldq %xmm2, %xmm0
punpckldq %xmm1, %xmm0
retq
After
define <2 x i64> @_ZN12reverse_u32s20hbdb206aba18a03d8tbaE(<2 x i64>) unnamed_addr #0 {
entry-block:
%1 = bitcast <2 x i64> %0 to <4 x i32>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%3 = bitcast <4 x i32> %2 to <2 x i64>
ret <2 x i64> %3
}
_ZN12reverse_u32s20hbdb206aba18a03d8tbaE:
.cfi_startproc
pshufd $27, %xmm0, %xmm0
retq
| -rw-r--r-- | src/librustc_trans/trans/intrinsic.rs | 65 |
1 files changed, 59 insertions, 6 deletions
diff --git a/src/librustc_trans/trans/intrinsic.rs b/src/librustc_trans/trans/intrinsic.rs index 6bdb35f8d60..890652401d7 100644 --- a/src/librustc_trans/trans/intrinsic.rs +++ b/src/librustc_trans/trans/intrinsic.rs @@ -11,7 +11,7 @@ #![allow(non_upper_case_globals)] use llvm; -use llvm::{SequentiallyConsistent, Acquire, Release, AtomicXchg, ValueRef}; +use llvm::{SequentiallyConsistent, Acquire, Release, AtomicXchg, ValueRef, TypeKind}; use middle::subst; use middle::subst::FnSpace; use trans::base::*; @@ -174,12 +174,65 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>, // This should be caught by the intrinsicck pass assert_eq!(in_type_size, out_type_size); - // We need to cast the dest so the types work out - let dest = match dest { - expr::SaveIn(d) => expr::SaveIn(PointerCast(bcx, d, llintype.ptr_to())), - expr::Ignore => expr::Ignore + let nonpointer_nonaggregate = |llkind: TypeKind| -> bool { + use llvm::TypeKind::*; + match llkind { + Half | Float | Double | X86_FP80 | FP128 | + PPC_FP128 | Integer | Vector | X86_MMX => true, + _ => false + } + }; + + // An approximation to which types can be directly cast via + // LLVM's bitcast. This doesn't cover pointer -> pointer casts, + // but does, importantly, cover SIMD types. + let in_kind = llintype.kind(); + let ret_kind = llret_ty.kind(); + let bitcast_compatible = + (nonpointer_nonaggregate(in_kind) && nonpointer_nonaggregate(ret_kind)) || { + in_kind == TypeKind::Pointer && ret_kind == TypeKind::Pointer + }; + + let dest = if bitcast_compatible { + // if we're here, the type is scalar-like (a primitive, a + // SIMD type or a pointer), and so can be handled as a + // by-value ValueRef and can also be directly bitcast to the + // target type. Doing this special case makes conversions + // like `u32x4` -> `u64x2` much nicer for LLVM and so more + // efficient (these are done efficiently implicitly in C + // with the `__m128i` type and so this means Rust doesn't + // lose out there). + let expr = &*arg_exprs[0]; + let datum = unpack_datum!(bcx, expr::trans(bcx, expr)); + let datum = unpack_datum!(bcx, datum.to_rvalue_datum(bcx, "transmute_temp")); + let val = if datum.kind.is_by_ref() { + load_ty(bcx, datum.val, datum.ty) + } else { + datum.val + }; + + let cast_val = BitCast(bcx, val, llret_ty); + + match dest { + expr::SaveIn(d) => { + // this often occurs in a sequence like `Store(val, + // d); val2 = Load(d)`, so disappears easily. + Store(bcx, cast_val, d); + } + expr::Ignore => {} + } + dest + } else { + // The types are too complicated to do with a by-value + // bitcast, so pointer cast instead. We need to cast the + // dest so the types work out. + let dest = match dest { + expr::SaveIn(d) => expr::SaveIn(PointerCast(bcx, d, llintype.ptr_to())), + expr::Ignore => expr::Ignore + }; + bcx = expr::trans_into(bcx, &*arg_exprs[0], dest); + dest }; - bcx = expr::trans_into(bcx, &*arg_exprs[0], dest); fcx.pop_custom_cleanup_scope(cleanup_scope); |
