about summary refs log tree commit diff
path: root/src/rustllvm/RustWrapper.cpp
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2014-12-11 00:11:23 +0000
committerbors <bors@rust-lang.org>2014-12-11 00:11:23 +0000
commit872ba2ccd3a428ad3b3a5f621a12aaa5e8043366 (patch)
treeccd700156e01a314f5b5efd337bb35d1c38ab086 /src/rustllvm/RustWrapper.cpp
parentc38e73fef53e8520e5170c40713e32ab965a8abe (diff)
parent1a620661b7ef7bbee10e56895e6ee081504b7378 (diff)
downloadrust-872ba2ccd3a428ad3b3a5f621a12aaa5e8043366.tar.gz
rust-872ba2ccd3a428ad3b3a5f621a12aaa5e8043366.zip
auto merge of #19294 : huonw/rust/transmute-inplace, r=nikomatsakis
This detects (a subset of) the cases when `transmute::<T, U>(x)` can be
lowered to a direct `bitcast T x to U` in LLVM. This assists with
efficiently handling a SIMD vector as multiple different types,
e.g. swapping bytes/words/double words around inside some larger vector
type.

C compilers like GCC and Clang handle integer vector types as `__m128i`
for all widths, and implicitly insert bitcasts as required. This patch
allows Rust to express this, even if it takes a bit of `unsafe`, whereas
previously it was impossible to do at all without inline assembly.

Example:

    pub fn reverse_u32s(u: u64x2) -> u64x2 {
        unsafe {
            let tmp = mem::transmute::<_, u32x4>(u);
            let swapped = u32x4(tmp.3, tmp.2, tmp.1, tmp.0);
            mem::transmute::<_, u64x2>(swapped)
        }
    }

Compiling with `--opt-level=3` gives:

Before

    define <2 x i64> @_ZN12reverse_u32s20hbdb206aba18a03d8tbaE(<2 x i64>) unnamed_addr #0 {
    entry-block:
      %1 = bitcast <2 x i64> %0 to i128
      %u.0.extract.trunc = trunc i128 %1 to i32
      %u.4.extract.shift = lshr i128 %1, 32
      %u.4.extract.trunc = trunc i128 %u.4.extract.shift to i32
      %u.8.extract.shift = lshr i128 %1, 64
      %u.8.extract.trunc = trunc i128 %u.8.extract.shift to i32
      %u.12.extract.shift = lshr i128 %1, 96
      %u.12.extract.trunc = trunc i128 %u.12.extract.shift to i32
      %2 = insertelement <4 x i32> undef, i32 %u.12.extract.trunc, i64 0
      %3 = insertelement <4 x i32> %2, i32 %u.8.extract.trunc, i64 1
      %4 = insertelement <4 x i32> %3, i32 %u.4.extract.trunc, i64 2
      %5 = insertelement <4 x i32> %4, i32 %u.0.extract.trunc, i64 3
      %6 = bitcast <4 x i32> %5 to <2 x i64>
      ret <2 x i64> %6
    }

    _ZN12reverse_u32s20hbdb206aba18a03d8tbaE:
    	.cfi_startproc
    	movd	%xmm0, %rax
    	punpckhqdq	%xmm0, %xmm0
    	movd	%xmm0, %rcx
    	movq	%rcx, %rdx
    	shrq	$32, %rdx
    	movq	%rax, %rsi
    	shrq	$32, %rsi
    	movd	%eax, %xmm0
    	movd	%ecx, %xmm1
    	punpckldq	%xmm0, %xmm1
    	movd	%esi, %xmm2
    	movd	%edx, %xmm0
    	punpckldq	%xmm2, %xmm0
    	punpckldq	%xmm1, %xmm0
    	retq

After

    define <2 x i64> @_ZN12reverse_u32s20hbdb206aba18a03d8tbaE(<2 x i64>) unnamed_addr #0 {
    entry-block:
      %1 = bitcast <2 x i64> %0 to <4 x i32>
      %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
      %3 = bitcast <4 x i32> %2 to <2 x i64>
      ret <2 x i64> %3
    }

    _ZN12reverse_u32s20hbdb206aba18a03d8tbaE:
    	.cfi_startproc
    	pshufd	$27, %xmm0, %xmm0
    	retq
Diffstat (limited to 'src/rustllvm/RustWrapper.cpp')
0 files changed, 0 insertions, 0 deletions