Auto merge of #110353 - the8472:in-place-flatten-chunks, r=cuviper

Expand in-place iteration specialization to Flatten, FlatMap and ArrayChunks This enables the following cases to collect in-place: ```rust let v = vec![[0u8; 4]; 1024] let v: Vec<_> = v.into_iter().flatten().collect(); let v: Vec<Option<NonZeroUsize>> = vec![NonZeroUsize::new(0); 1024]; let v: Vec<_> = v.into_iter().flatten().collect(); let v = vec![u8; 4096]; let v: Vec<_> = v.into_iter().array_chunks::<4>().collect(); ``` Especially the nicheful-option-flattening should be useful in real code.
author: bors <bors@rust-lang.org> 2023-11-28 12:22:16 +0000
committer: bors <bors@rust-lang.org> 2023-11-28 12:22:16 +0000
commit: df0295f07175acc7325ce3ca4152eb05752af1f2 (patch)
tree: 664b294535398b82283c0edcf032b7f46c11b950 /library/alloc/src/vec
parent: 46a24ed2f4b4bdfccca36fb20b1574a6164893d8 (diff)
parent: 072b51cbb5b4ee39ae433b5f92a5952054e6caf3 (diff)
download: rust-df0295f07175acc7325ce3ca4152eb05752af1f2.tar.gz
rust-df0295f07175acc7325ce3ca4152eb05752af1f2.zip
2 files changed, 118 insertions, 27 deletions
diff --git a/library/alloc/src/vec/in_place_collect.rs b/library/alloc/src/vec/in_place_collect.rs
index 5ecd0479971..e4f96fd7640 100644
--- a/library/alloc/src/vec/in_place_collect.rs
+++ b/library/alloc/src/vec/in_place_collect.rs
@@ -6,11 +6,11 @@
 //! The specialization in this module applies to iterators in the shape of
 //! `source.adapter().adapter().adapter().collect::<Vec<U>>()`
 //! where `source` is an owning iterator obtained from [`Vec<T>`], [`Box<[T]>`][box] (by conversion to `Vec`)
-//! or [`BinaryHeap<T>`], the adapters each consume one or more items per step
-//! (represented by [`InPlaceIterable`]), provide transitive access to `source` (via [`SourceIter`])
-//! and thus the underlying allocation. And finally the layouts of `T` and `U` must
-//! have the same size and alignment, this is currently ensured via const eval instead of trait bounds
-//! in the specialized [`SpecFromIter`] implementation.
+//! or [`BinaryHeap<T>`], the adapters guarantee to consume enough items per step to make room
+//! for the results (represented by [`InPlaceIterable`]), provide transitive access to `source`
+//! (via [`SourceIter`]) and thus the underlying allocation.
+//! And finally there are alignment and size constriants to consider, this is currently ensured via
+//! const eval instead of trait bounds in the specialized [`SpecFromIter`] implementation.
 //!
 //! [`BinaryHeap<T>`]: crate::collections::BinaryHeap
 //! [box]: crate::boxed::Box
@@ -35,11 +35,28 @@
 //! the step of reading a value and getting a reference to write to. Instead raw pointers must be
 //! used on the reader and writer side.
 //!
-//! That writes never clobber a yet-to-be-read item is ensured by the [`InPlaceIterable`] requirements.
+//! That writes never clobber a yet-to-be-read items is ensured by the [`InPlaceIterable`] requirements.
 //!
 //! # Layout constraints
 //!
-//! [`Allocator`] requires that `allocate()` and `deallocate()` have matching alignment and size.
+//! When recycling an allocation between different types we must uphold the [`Allocator`] contract
+//! which means that the input and output Layouts have to "fit".
+//!
+//! To complicate things further `InPlaceIterable` supports splitting or merging items into smaller/
+//! larger ones to enable (de)aggregation of arrays.
+//!
+//! Ultimately each step of the iterator must free up enough *bytes* in the source to make room
+//! for the next output item.
+//! If `T` and `U` have the same size no fixup is needed.
+//! If `T`'s size is a multiple of `U`'s we can compensate by multiplying the capacity accordingly.
+//! Otherwise the input capacity (and thus layout) in bytes may not be representable by the output
+//! `Vec<U>`. In that case `alloc.shrink()` is used to update the allocation's layout.
+//!
+//! Alignments of `T` must be the same or larger than `U`. Since alignments are always a power
+//! of two _larger_ implies _is a multiple of_.
+//!
+//! See `in_place_collectible()` for the current conditions.
+//!
 //! Additionally this specialization doesn't make sense for ZSTs as there is no reallocation to
 //! avoid and it would make pointer arithmetic more difficult.
 //!
@@ -137,44 +154,73 @@
 //! }
 //! vec.truncate(write_idx);
 //! ```
+use crate::alloc::{handle_alloc_error, Global};
+use core::alloc::Allocator;
+use core::alloc::Layout;
 use core::iter::{InPlaceIterable, SourceIter, TrustedRandomAccessNoCoerce};
 use core::mem::{self, ManuallyDrop, SizedTypeProperties};
-use core::ptr::{self};
+use core::num::NonZeroUsize;
+use core::ptr::{self, NonNull};
 
 use super::{InPlaceDrop, InPlaceDstBufDrop, SpecFromIter, SpecFromIterNested, Vec};
 
-/// Specialization marker for collecting an iterator pipeline into a Vec while reusing the
-/// source allocation, i.e. executing the pipeline in place.
-#[rustc_unsafe_specialization_marker]
-pub(super) trait InPlaceIterableMarker {}
+const fn in_place_collectible<DEST, SRC>(
+    step_merge: Option<NonZeroUsize>,
+    step_expand: Option<NonZeroUsize>,
+) -> bool {
+    if DEST::IS_ZST || mem::align_of::<SRC>() < mem::align_of::<DEST>() {
+        return false;
+    }
+
+    match (step_merge, step_expand) {
+        (Some(step_merge), Some(step_expand)) => {
+            // At least N merged source items -> at most M expanded destination items
+            // e.g.
+            // - 1 x [u8; 4] -> 4x u8, via flatten
+            // - 4 x u8 -> 1x [u8; 4], via array_chunks
+            mem::size_of::<SRC>() * step_merge.get() >= mem::size_of::<DEST>() * step_expand.get()
+        }
+        // Fall back to other from_iter impls if an overflow occurred in the step merge/expansion
+        // tracking.
+        _ => false,
+    }
+}
 
-impl<T> InPlaceIterableMarker for T where T: InPlaceIterable {}
+/// This provides a shorthand for the source type since local type aliases aren't a thing.
+#[rustc_specialization_trait]
+trait InPlaceCollect: SourceIter<Source: AsVecIntoIter> + InPlaceIterable {
+    type Src;
+}
+
+impl<T> InPlaceCollect for T
+where
+    T: SourceIter<Source: AsVecIntoIter> + InPlaceIterable,
+{
+    type Src = <<T as SourceIter>::Source as AsVecIntoIter>::Item;
+}
 
 impl<T, I> SpecFromIter<T, I> for Vec<T>
 where
-    I: Iterator<Item = T> + SourceIter<Source: AsVecIntoIter> + InPlaceIterableMarker,
+    I: Iterator<Item = T> + InPlaceCollect,
+    <I as SourceIter>::Source: AsVecIntoIter,
 {
     default fn from_iter(mut iterator: I) -> Self {
         // See "Layout constraints" section in the module documentation. We rely on const
         // optimization here since these conditions currently cannot be expressed as trait bounds
-        if T::IS_ZST
-            || mem::size_of::<T>()
-                != mem::size_of::<<<I as SourceIter>::Source as AsVecIntoIter>::Item>()
-            || mem::align_of::<T>()
-                != mem::align_of::<<<I as SourceIter>::Source as AsVecIntoIter>::Item>()
-        {
+        if const { !in_place_collectible::<T, I::Src>(I::MERGE_BY, I::EXPAND_BY) } {
             // fallback to more generic implementations
             return SpecFromIterNested::from_iter(iterator);
         }
 
-        let (src_buf, src_ptr, dst_buf, dst_end, cap) = unsafe {
+        let (src_buf, src_ptr, src_cap, mut dst_buf, dst_end, dst_cap) = unsafe {
             let inner = iterator.as_inner().as_into_iter();
             (
                 inner.buf.as_ptr(),
                 inner.ptr,
+                inner.cap,
                 inner.buf.as_ptr() as *mut T,
                 inner.end as *const T,
-                inner.cap,
+                inner.cap * mem::size_of::<I::Src>() / mem::size_of::<T>(),
             )
         };
 
@@ -196,18 +242,55 @@ where
         }
 
         // The ownership of the allocation and the new `T` values is temporarily moved into `dst_guard`.
-        // This is safe because `forget_allocation_drop_remaining` immediately forgets the allocation
+        // This is safe because
+        // * `forget_allocation_drop_remaining` immediately forgets the allocation
         // before any panic can occur in order to avoid any double free, and then proceeds to drop
         // any remaining values at the tail of the source.
+        // * the shrink either panics without invalidating the allocation, aborts or
+        //   succeeds. In the last case we disarm the guard.
         //
         // Note: This access to the source wouldn't be allowed by the TrustedRandomIteratorNoCoerce
         // contract (used by SpecInPlaceCollect below). But see the "O(1) collect" section in the
         // module documentation why this is ok anyway.
-        let dst_guard = InPlaceDstBufDrop { ptr: dst_buf, len, cap };
+        let dst_guard = InPlaceDstBufDrop { ptr: dst_buf, len, cap: dst_cap };
         src.forget_allocation_drop_remaining();
+
+        // Adjust the allocation if the alignment didn't match or the source had a capacity in bytes
+        // that wasn't a multiple of the destination type size.
+        // Since the discrepancy should generally be small this should only result in some
+        // bookkeeping updates and no memmove.
+        if (const {
+            let src_sz = mem::size_of::<I::Src>();
+            src_sz > 0 && mem::size_of::<T>() % src_sz != 0
+        } && src_cap * mem::size_of::<I::Src>() != dst_cap * mem::size_of::<T>())
+            || const { mem::align_of::<T>() != mem::align_of::<I::Src>() }
+        {
+            let alloc = Global;
+            unsafe {
+                // The old allocation exists, therefore it must have a valid layout.
+                let src_align = mem::align_of::<I::Src>();
+                let src_size = mem::size_of::<I::Src>().unchecked_mul(src_cap);
+                let old_layout = Layout::from_size_align_unchecked(src_size, src_align);
+
+                // The must be equal or smaller for in-place iteration to be possible
+                // therefore the new layout must be ≤ the old one and therefore valid.
+                let dst_align = mem::align_of::<T>();
+                let dst_size = mem::size_of::<T>().unchecked_mul(dst_cap);
+                let new_layout = Layout::from_size_align_unchecked(dst_size, dst_align);
+
+                let result = alloc.shrink(
+                    NonNull::new_unchecked(dst_buf as *mut u8),
+                    old_layout,
+                    new_layout,
+                );
+                let Ok(reallocated) = result else { handle_alloc_error(new_layout) };
+                dst_buf = reallocated.as_ptr() as *mut T;
+            }
+        }
+
         mem::forget(dst_guard);
 
-        let vec = unsafe { Vec::from_raw_parts(dst_buf, len, cap) };
+        let vec = unsafe { Vec::from_raw_parts(dst_buf, len, dst_cap) };
 
         vec
     }
diff --git a/library/alloc/src/vec/into_iter.rs b/library/alloc/src/vec/into_iter.rs
index d51f5a548b5..b03e04b7c70 100644
--- a/library/alloc/src/vec/into_iter.rs
+++ b/library/alloc/src/vec/into_iter.rs
@@ -7,7 +7,8 @@ use crate::raw_vec::RawVec;
 use core::array;
 use core::fmt;
 use core::iter::{
-    FusedIterator, InPlaceIterable, SourceIter, TrustedLen, TrustedRandomAccessNoCoerce,
+    FusedIterator, InPlaceIterable, SourceIter, TrustedFused, TrustedLen,
+    TrustedRandomAccessNoCoerce,
 };
 use core::marker::PhantomData;
 use core::mem::{self, ManuallyDrop, MaybeUninit, SizedTypeProperties};
@@ -337,6 +338,10 @@ impl<T, A: Allocator> ExactSizeIterator for IntoIter<T, A> {
 #[stable(feature = "fused", since = "1.26.0")]
 impl<T, A: Allocator> FusedIterator for IntoIter<T, A> {}
 
+#[doc(hidden)]
+#[unstable(issue = "none", feature = "trusted_fused")]
+unsafe impl<T, A: Allocator> TrustedFused for IntoIter<T, A> {}
+
 #[unstable(feature = "trusted_len", issue = "37572")]
 unsafe impl<T, A: Allocator> TrustedLen for IntoIter<T, A> {}
 
@@ -421,7 +426,10 @@ unsafe impl<#[may_dangle] T, A: Allocator> Drop for IntoIter<T, A> {
 // also refer to the vec::in_place_collect module documentation to get an overview
 #[unstable(issue = "none", feature = "inplace_iteration")]
 #[doc(hidden)]
-unsafe impl<T, A: Allocator> InPlaceIterable for IntoIter<T, A> {}
+unsafe impl<T, A: Allocator> InPlaceIterable for IntoIter<T, A> {
+    const EXPAND_BY: Option<NonZeroUsize> = NonZeroUsize::new(1);
+    const MERGE_BY: Option<NonZeroUsize> = NonZeroUsize::new(1);
+}
 
 #[unstable(issue = "none", feature = "inplace_iteration")]
 #[doc(hidden)]
author	bors <bors@rust-lang.org>	2023-11-28 12:22:16 +0000
committer	bors <bors@rust-lang.org>	2023-11-28 12:22:16 +0000
commit	df0295f07175acc7325ce3ca4152eb05752af1f2 (patch)
tree	664b294535398b82283c0edcf032b7f46c11b950 /library/alloc/src/vec
parent	46a24ed2f4b4bdfccca36fb20b1574a6164893d8 (diff)
parent	072b51cbb5b4ee39ae433b5f92a5952054e6caf3 (diff)
download	rust-df0295f07175acc7325ce3ca4152eb05752af1f2.tar.gz rust-df0295f07175acc7325ce3ca4152eb05752af1f2.zip