diff options
| author | Djzin <djzin@users.noreply.github.com> | 2017-05-07 20:26:19 +0100 |
|---|---|---|
| committer | Djzin <djzin@users.noreply.github.com> | 2017-05-07 20:26:19 +0100 |
| commit | 165f3668d602c4eaa02125cf86fb8d12719cb441 (patch) | |
| tree | 6d050fccd4dbadfa4defe8a01bc35b708c821e34 /src/libcore | |
| parent | c6ca81aa921415a33e1c8f32e0c53a2c5eb6b485 (diff) | |
| download | rust-165f3668d602c4eaa02125cf86fb8d12719cb441.tar.gz rust-165f3668d602c4eaa02125cf86fb8d12719cb441.zip | |
optimize out stack alignment for sizes < 32
Diffstat (limited to 'src/libcore')
| -rw-r--r-- | src/libcore/mem.rs | 17 |
1 files changed, 12 insertions, 5 deletions
diff --git a/src/libcore/mem.rs b/src/libcore/mem.rs index e51976c4845..2dc1f0e04bc 100644 --- a/src/libcore/mem.rs +++ b/src/libcore/mem.rs @@ -453,15 +453,13 @@ pub fn swap<T>(x: &mut T, y: &mut T) { // #[repr(simd)], even if we don't actually use this struct directly. #[repr(simd)] struct Block(u64, u64, u64, u64); - let block_size = size_of::<Block>(); + struct UnalignedBlock(u64, u64, u64, u64); - // Create some uninitialized memory as scratch space - let mut t: Block = uninitialized(); + let block_size = size_of::<Block>(); - // Get raw pointers to the bytes of x, y & t for easier manipulation + // Get raw pointers to the bytes of x & y for easier manipulation let x = x as *mut T as *mut u8; let y = y as *mut T as *mut u8; - let t = &mut t as *mut _ as *mut u8; // Loop through x & y, copying them `Block` at a time // The optimizer should unroll the loop fully for most types @@ -469,6 +467,12 @@ pub fn swap<T>(x: &mut T, y: &mut T) { let len = size_of::<T>() as isize; let mut i = 0; while i + block_size as isize <= len { + // Create some uninitialized memory as scratch space + // Moving the declaration of `t` here avoids aligning the stack when + // this loop is unused + let mut t: Block = uninitialized(); + let t = &mut t as *mut _ as *mut u8; + // Swap a block of bytes of x & y, using t as a temporary buffer // This should be optimized into efficient SIMD operations where available ptr::copy_nonoverlapping(x.offset(i), t, block_size); @@ -478,6 +482,9 @@ pub fn swap<T>(x: &mut T, y: &mut T) { } if i < len { // Swap any remaining bytes + let mut t: UnalignedBlock = uninitialized(); + let t = &mut t as *mut _ as *mut u8; + let rem = (len - i) as usize; ptr::copy_nonoverlapping(x.offset(i), t, rem); ptr::copy_nonoverlapping(y.offset(i), x.offset(i), rem); |
