diff options
| author | Scott McMurray <scottmcm@users.noreply.github.com> | 2017-05-05 18:54:47 -0700 |
|---|---|---|
| committer | Scott McMurray <scottmcm@users.noreply.github.com> | 2017-05-05 18:54:47 -0700 |
| commit | 1f891d11f5ff64e1f2e9cba79f1069f7a8d13c7f (patch) | |
| tree | 11e387f90f87cab447cfbb29ac9d15cbb746c95b /src/libcore | |
| parent | e8fad325fe8630c0a6561d3e0c9c5fc51423aac0 (diff) | |
| download | rust-1f891d11f5ff64e1f2e9cba79f1069f7a8d13c7f.tar.gz rust-1f891d11f5ff64e1f2e9cba79f1069f7a8d13c7f.zip | |
Improve implementation approach comments in [T]::reverse()
Diffstat (limited to 'src/libcore')
| -rw-r--r-- | src/libcore/slice/mod.rs | 19 |
1 files changed, 15 insertions, 4 deletions
diff --git a/src/libcore/slice/mod.rs b/src/libcore/slice/mod.rs index bf637af0639..e15eb8f2444 100644 --- a/src/libcore/slice/mod.rs +++ b/src/libcore/slice/mod.rs @@ -540,12 +540,24 @@ impl<T> SliceExt for [T] { let mut i: usize = 0; let ln = self.len(); + // For very small types, all the individual reads in the normal + // path perform poorly. We can do better, given efficient unaligned + // load/store, by loading a larger chunk and reversing a register. + + // Ideally LLVM would do this for us, as it knows better than we do + // whether unaligned reads are efficient (since that changes between + // different ARM versions, for example) and what the best chunk size + // would be. Unfortunately, as of LLVM 4.0 (2017-05) it only unrolls + // the loop, so we need to do this ourselves. (Hypothesis: reverse + // is troublesome because the sides can be aligned differently -- + // will be, when the length is odd -- so there's no way of emitting + // pre- and postludes to use fully-aligned SIMD in the middle.) + let fast_unaligned = cfg!(any(target_arch = "x86", target_arch = "x86_64")); if fast_unaligned && mem::size_of::<T>() == 1 { - // Single-byte read & write are comparatively slow. Instead, - // work in usize chunks and get bswap to do the hard work. + // Use the llvm.bswap intrinsic to reverse u8s in a usize let chunk = mem::size_of::<usize>(); while i + chunk - 1 < ln / 2 { unsafe { @@ -561,8 +573,7 @@ impl<T> SliceExt for [T] { } if fast_unaligned && mem::size_of::<T>() == 2 { - // Not quite as good as the above, but still helpful. - // Same general idea, read bigger and do the swap in a register. + // Use rotate-by-16 to reverse u16s in a u32 let chunk = mem::size_of::<u32>() / 2; while i + chunk - 1 < ln / 2 { unsafe { |
