diff options
| author | bors <bors@rust-lang.org> | 2021-03-22 01:16:29 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2021-03-22 01:16:29 +0000 |
| commit | 35385770ae1ea86a911cc44ac43f856831e44b26 (patch) | |
| tree | 509eb08317fe567c9294f2162699bb310266f52c | |
| parent | 97663b6690689379aa0493deb494dfe14627c46b (diff) | |
| parent | 2d8be457434103bfeeea59f1882a15760f2a5c70 (diff) | |
| download | rust-35385770ae1ea86a911cc44ac43f856831e44b26.tar.gz rust-35385770ae1ea86a911cc44ac43f856831e44b26.zip | |
Auto merge of #79846 - the8472:inplace-tra, r=m-ou-se
Use TrustedRandomAccess for in-place iterators where possible This can speed up in-place iterators containing simple casts and transmutes from `Copy` types to any type of same size. `!Copy` types can't be optimized since `TrustedRandomAccess` isn't implemented for those iterators. ``` name on.b ns/iter o1.b ns/iter diff ns/iter diff % speedup vec::bench_transmute 20 (40000 MB/s) 12 (66666 MB/s) -8 -40.00% x 1.67 ```
| -rw-r--r-- | library/alloc/benches/vec.rs | 16 | ||||
| -rw-r--r-- | library/alloc/src/vec/source_iter_marker.rs | 69 | ||||
| -rw-r--r-- | src/test/codegen/vec-in-place.rs | 14 |
3 files changed, 83 insertions, 16 deletions
diff --git a/library/alloc/benches/vec.rs b/library/alloc/benches/vec.rs index 73eb353f6e7..7a098219ce4 100644 --- a/library/alloc/benches/vec.rs +++ b/library/alloc/benches/vec.rs @@ -548,6 +548,22 @@ fn bench_in_place_zip_iter_mut(b: &mut Bencher) { black_box(data); } +pub fn vec_cast<T, U>(input: Vec<T>) -> Vec<U> { + input.into_iter().map(|e| unsafe { std::mem::transmute_copy(&e) }).collect() +} + +#[bench] +fn bench_transmute(b: &mut Bencher) { + let mut vec = vec![10u32; 100]; + b.bytes = 800; // 2 casts x 4 bytes x 100 + b.iter(|| { + let v = std::mem::take(&mut vec); + let v = black_box(vec_cast::<u32, i32>(v)); + let v = black_box(vec_cast::<i32, u32>(v)); + vec = v; + }); +} + #[derive(Clone)] struct Droppable(usize); diff --git a/library/alloc/src/vec/source_iter_marker.rs b/library/alloc/src/vec/source_iter_marker.rs index 8c0e95559fa..50882fc1767 100644 --- a/library/alloc/src/vec/source_iter_marker.rs +++ b/library/alloc/src/vec/source_iter_marker.rs @@ -1,4 +1,4 @@ -use core::iter::{InPlaceIterable, SourceIter}; +use core::iter::{InPlaceIterable, SourceIter, TrustedRandomAccess}; use core::mem::{self, ManuallyDrop}; use core::ptr::{self}; @@ -52,16 +52,7 @@ where ) }; - // use try-fold since - // - it vectorizes better for some iterator adapters - // - unlike most internal iteration methods, it only takes a &mut self - // - it lets us thread the write pointer through its innards and get it back in the end - let sink = InPlaceDrop { inner: dst_buf, dst: dst_buf }; - let sink = iterator - .try_fold::<_, _, Result<_, !>>(sink, write_in_place_with_drop(dst_end)) - .unwrap(); - // iteration succeeded, don't drop head - let dst = ManuallyDrop::new(sink).dst; + let len = SpecInPlaceCollect::collect_in_place(&mut iterator, dst_buf, dst_end); let src = unsafe { iterator.as_inner().as_into_iter() }; // check if SourceIter contract was upheld @@ -72,7 +63,7 @@ where // then the source pointer will stay in its initial position and we can't use it as reference if src.ptr != src_ptr { debug_assert!( - dst as *const _ <= src.ptr, + unsafe { dst_buf.add(len) as *const _ } <= src.ptr, "InPlaceIterable contract violation, write pointer advanced beyond read pointer" ); } @@ -82,10 +73,7 @@ where // but prevent drop of the allocation itself once IntoIter goes out of scope src.forget_allocation(); - let vec = unsafe { - let len = dst.offset_from(dst_buf) as usize; - Vec::from_raw_parts(dst_buf, len, cap) - }; + let vec = unsafe { Vec::from_raw_parts(dst_buf, len, cap) }; vec } @@ -106,3 +94,52 @@ fn write_in_place_with_drop<T>( Ok(sink) } } + +/// Helper trait to hold specialized implementations of the in-place iterate-collect loop +trait SpecInPlaceCollect<T, I>: Iterator<Item = T> { + /// Collects an iterator (`self`) into the destination buffer (`dst`) and returns the number of items + /// collected. `end` is the last writable element of the allocation and used for bounds checks. + fn collect_in_place(&mut self, dst: *mut T, end: *const T) -> usize; +} + +impl<T, I> SpecInPlaceCollect<T, I> for I +where + I: Iterator<Item = T>, +{ + #[inline] + default fn collect_in_place(&mut self, dst_buf: *mut T, end: *const T) -> usize { + // use try-fold since + // - it vectorizes better for some iterator adapters + // - unlike most internal iteration methods, it only takes a &mut self + // - it lets us thread the write pointer through its innards and get it back in the end + let sink = InPlaceDrop { inner: dst_buf, dst: dst_buf }; + let sink = + self.try_fold::<_, _, Result<_, !>>(sink, write_in_place_with_drop(end)).unwrap(); + // iteration succeeded, don't drop head + unsafe { ManuallyDrop::new(sink).dst.offset_from(dst_buf) as usize } + } +} + +impl<T, I> SpecInPlaceCollect<T, I> for I +where + I: Iterator<Item = T> + TrustedRandomAccess, +{ + #[inline] + fn collect_in_place(&mut self, dst_buf: *mut T, end: *const T) -> usize { + let len = self.size(); + let mut drop_guard = InPlaceDrop { inner: dst_buf, dst: dst_buf }; + for i in 0..len { + // Safety: InplaceIterable contract guarantees that for every element we read + // one slot in the underlying storage will have been freed up and we can immediately + // write back the result. + unsafe { + let dst = dst_buf.offset(i as isize); + debug_assert!(dst as *const _ <= end, "InPlaceIterable contract violation"); + ptr::write(dst, self.__iterator_get_unchecked(i)); + drop_guard.dst = dst.add(1); + } + } + mem::forget(drop_guard); + len + } +} diff --git a/src/test/codegen/vec-in-place.rs b/src/test/codegen/vec-in-place.rs new file mode 100644 index 00000000000..72ed7492be9 --- /dev/null +++ b/src/test/codegen/vec-in-place.rs @@ -0,0 +1,14 @@ +// ignore-debug: the debug assertions get in the way +// compile-flags: -O +// min-llvm-version: 11.0 +#![crate_type = "lib"] + +// Ensure that trivial casts of vec elements are O(1) + +// CHECK-LABEL: @vec_iterator_cast +#[no_mangle] +pub fn vec_iterator_cast(vec: Vec<isize>) -> Vec<usize> { + // CHECK-NOT: loop + // CHECK-NOT: call + vec.into_iter().map(|e| e as usize).collect() +} |
