diff options
| author | bors <bors@rust-lang.org> | 2022-07-27 01:12:30 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2022-07-27 01:12:30 +0000 |
| commit | b573e10d21b69ebfadf41aa9c2f0a27919fe4480 (patch) | |
| tree | e4ec1b41755a3e7e6f4e299ba14473e08a578667 | |
| parent | 4d6d601c8a83284d6b23c253a3e2a060fd197316 (diff) | |
| parent | 4ba7cac359b0180add75d78929ebae4f90813fa1 (diff) | |
| download | rust-b573e10d21b69ebfadf41aa9c2f0a27919fe4480.tar.gz rust-b573e10d21b69ebfadf41aa9c2f0a27919fe4480.zip | |
Auto merge of #98553 - the8472:next_chunk_opt, r=Mark-Simulacrum
Optimized vec::IntoIter::next_chunk impl ``` x86_64v1, default test vec::bench_next_chunk ... bench: 696 ns/iter (+/- 22) x86_64v1, pr test vec::bench_next_chunk ... bench: 309 ns/iter (+/- 4) znver2, default test vec::bench_next_chunk ... bench: 17,272 ns/iter (+/- 117) znver2, pr test vec::bench_next_chunk ... bench: 211 ns/iter (+/- 3) ``` On znver2 the default impl seems to be slow due to different inlining decisions. It goes through `core::array::iter_next_chunk` which has a deep call tree.
| -rw-r--r-- | library/alloc/benches/lib.rs | 1 | ||||
| -rw-r--r-- | library/alloc/benches/vec.rs | 20 | ||||
| -rw-r--r-- | library/alloc/src/lib.rs | 4 | ||||
| -rw-r--r-- | library/alloc/src/vec/into_iter.rs | 41 | ||||
| -rw-r--r-- | library/alloc/tests/lib.rs | 1 | ||||
| -rw-r--r-- | library/alloc/tests/vec.rs | 10 |
6 files changed, 75 insertions, 2 deletions
diff --git a/library/alloc/benches/lib.rs b/library/alloc/benches/lib.rs index 7dc0f7cebd5..72ac897d4f1 100644 --- a/library/alloc/benches/lib.rs +++ b/library/alloc/benches/lib.rs @@ -2,6 +2,7 @@ // See https://github.com/rust-lang/rust/issues/73535#event-3477699747 #![cfg(not(target_os = "android"))] #![feature(btree_drain_filter)] +#![feature(iter_next_chunk)] #![feature(map_first_last)] #![feature(repr_simd)] #![feature(slice_partition_dedup)] diff --git a/library/alloc/benches/vec.rs b/library/alloc/benches/vec.rs index efc47327e8a..663f6b9dd1c 100644 --- a/library/alloc/benches/vec.rs +++ b/library/alloc/benches/vec.rs @@ -762,3 +762,23 @@ fn bench_retain_whole_100000(b: &mut Bencher) { let mut v = black_box(vec![826u32; 100000]); b.iter(|| v.retain(|x| *x == 826u32)); } + +#[bench] +fn bench_next_chunk(b: &mut Bencher) { + let v = vec![13u8; 2048]; + + b.iter(|| { + const CHUNK: usize = 8; + + let mut sum = [0u32; CHUNK]; + let mut iter = black_box(v.clone()).into_iter(); + + while let Ok(chunk) = iter.next_chunk::<CHUNK>() { + for i in 0..CHUNK { + sum[i] += chunk[i] as u32; + } + } + + sum + }) +} diff --git a/library/alloc/src/lib.rs b/library/alloc/src/lib.rs index 315469387e5..8b6f4054851 100644 --- a/library/alloc/src/lib.rs +++ b/library/alloc/src/lib.rs @@ -89,6 +89,7 @@ #![feature(alloc_layout_extra)] #![feature(allocator_api)] #![feature(array_chunks)] +#![feature(array_into_iter_constructors)] #![feature(array_methods)] #![feature(array_windows)] #![feature(assert_matches)] @@ -117,8 +118,11 @@ #![feature(hasher_prefixfree_extras)] #![feature(inplace_iteration)] #![feature(iter_advance_by)] +#![feature(iter_next_chunk)] #![feature(layout_for_ptr)] +#![feature(maybe_uninit_array_assume_init)] #![feature(maybe_uninit_slice)] +#![feature(maybe_uninit_uninit_array)] #![cfg_attr(test, feature(new_uninit))] #![feature(nonnull_slice_from_raw_parts)] #![feature(pattern)] diff --git a/library/alloc/src/vec/into_iter.rs b/library/alloc/src/vec/into_iter.rs index 28979457b7f..1b483e3fc77 100644 --- a/library/alloc/src/vec/into_iter.rs +++ b/library/alloc/src/vec/into_iter.rs @@ -2,13 +2,14 @@ use super::AsVecIntoIter; use crate::alloc::{Allocator, Global}; use crate::raw_vec::RawVec; +use core::array; use core::fmt; use core::intrinsics::arith_offset; use core::iter::{ FusedIterator, InPlaceIterable, SourceIter, TrustedLen, TrustedRandomAccessNoCoerce, }; use core::marker::PhantomData; -use core::mem::{self, ManuallyDrop}; +use core::mem::{self, ManuallyDrop, MaybeUninit}; #[cfg(not(no_global_oom_handling))] use core::ops::Deref; use core::ptr::{self, NonNull}; @@ -124,7 +125,6 @@ impl<T, A: Allocator> IntoIter<T, A> { } /// Forgets to Drop the remaining elements while still allowing the backing allocation to be freed. - #[cfg(not(no_global_oom_handling))] pub(crate) fn forget_remaining_elements(&mut self) { self.ptr = self.end; } @@ -204,6 +204,43 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> { self.len() } + #[inline] + fn next_chunk<const N: usize>(&mut self) -> Result<[T; N], core::array::IntoIter<T, N>> { + let mut raw_ary = MaybeUninit::uninit_array(); + + let len = self.len(); + + if mem::size_of::<T>() == 0 { + if len < N { + self.forget_remaining_elements(); + // Safety: ZSTs can be conjured ex nihilo, only the amount has to be correct + return Err(unsafe { array::IntoIter::new_unchecked(raw_ary, 0..len) }); + } + + self.ptr = unsafe { arith_offset(self.ptr as *const i8, N as isize) as *mut T }; + // Safety: ditto + return Ok(unsafe { MaybeUninit::array_assume_init(raw_ary) }); + } + + if len < N { + // Safety: `len` indicates that this many elements are available and we just checked that + // it fits into the array. + unsafe { + ptr::copy_nonoverlapping(self.ptr, raw_ary.as_mut_ptr() as *mut T, len); + self.forget_remaining_elements(); + return Err(array::IntoIter::new_unchecked(raw_ary, 0..len)); + } + } + + // Safety: `len` is larger than the array size. Copy a fixed amount here to fully initialize + // the array. + return unsafe { + ptr::copy_nonoverlapping(self.ptr, raw_ary.as_mut_ptr() as *mut T, N); + self.ptr = self.ptr.add(N); + Ok(MaybeUninit::array_assume_init(raw_ary)) + }; + } + unsafe fn __iterator_get_unchecked(&mut self, i: usize) -> Self::Item where Self: TrustedRandomAccessNoCoerce, diff --git a/library/alloc/tests/lib.rs b/library/alloc/tests/lib.rs index c29e7b9c81e..d83cd29ddba 100644 --- a/library/alloc/tests/lib.rs +++ b/library/alloc/tests/lib.rs @@ -27,6 +27,7 @@ #![feature(binary_heap_as_slice)] #![feature(inplace_iteration)] #![feature(iter_advance_by)] +#![feature(iter_next_chunk)] #![feature(round_char_boundary)] #![feature(slice_group_by)] #![feature(slice_partition_dedup)] diff --git a/library/alloc/tests/vec.rs b/library/alloc/tests/vec.rs index 699567be5a0..d94da8f5f5a 100644 --- a/library/alloc/tests/vec.rs +++ b/library/alloc/tests/vec.rs @@ -1,4 +1,5 @@ use core::alloc::{Allocator, Layout}; +use core::iter::IntoIterator; use core::ptr::NonNull; use std::alloc::System; use std::assert_matches::assert_matches; @@ -931,6 +932,15 @@ fn test_into_iter_count() { } #[test] +fn test_into_iter_next_chunk() { + let mut iter = b"lorem".to_vec().into_iter(); + + assert_eq!(iter.next_chunk().unwrap(), [b'l', b'o']); // N is inferred as 2 + assert_eq!(iter.next_chunk().unwrap(), [b'r', b'e', b'm']); // N is inferred as 3 + assert_eq!(iter.next_chunk::<4>().unwrap_err().as_slice(), &[]); // N is explicitly 4 +} + +#[test] fn test_into_iter_clone() { fn iter_equal<I: Iterator<Item = i32>>(it: I, slice: &[i32]) { let v: Vec<i32> = it.collect(); |
