diff options
Diffstat (limited to 'library/alloc/src')
25 files changed, 1856 insertions, 321 deletions
diff --git a/library/alloc/src/alloc.rs b/library/alloc/src/alloc.rs index c9b98fa4e5a..76630a746dd 100644 --- a/library/alloc/src/alloc.rs +++ b/library/alloc/src/alloc.rs @@ -17,6 +17,7 @@ unsafe extern "Rust" { #[rustc_allocator] #[rustc_nounwind] #[rustc_std_internal_symbol] + #[rustc_allocator_zeroed_variant = "__rust_alloc_zeroed"] fn __rust_alloc(size: usize, align: usize) -> *mut u8; #[rustc_deallocator] #[rustc_nounwind] diff --git a/library/alloc/src/borrow.rs b/library/alloc/src/borrow.rs index 07f51b7614f..cb32896161e 100644 --- a/library/alloc/src/borrow.rs +++ b/library/alloc/src/borrow.rs @@ -17,9 +17,11 @@ use crate::fmt; use crate::string::String; #[stable(feature = "rust1", since = "1.0.0")] -impl<'a, B: ?Sized> Borrow<B> for Cow<'a, B> +#[rustc_const_unstable(feature = "const_convert", issue = "143773")] +impl<'a, B: ?Sized> const Borrow<B> for Cow<'a, B> where B: ToOwned, + B::Owned: [const] Borrow<B>, { fn borrow(&self) -> &B { &**self @@ -326,9 +328,10 @@ impl<B: ?Sized + ToOwned> Cow<'_, B> { } #[stable(feature = "rust1", since = "1.0.0")] -impl<B: ?Sized + ToOwned> Deref for Cow<'_, B> +#[rustc_const_unstable(feature = "const_convert", issue = "143773")] +impl<B: ?Sized + ToOwned> const Deref for Cow<'_, B> where - B::Owned: Borrow<B>, + B::Owned: [const] Borrow<B>, { type Target = B; @@ -439,7 +442,11 @@ where } #[stable(feature = "rust1", since = "1.0.0")] -impl<T: ?Sized + ToOwned> AsRef<T> for Cow<'_, T> { +#[rustc_const_unstable(feature = "const_convert", issue = "143773")] +impl<T: ?Sized + ToOwned> const AsRef<T> for Cow<'_, T> +where + T::Owned: [const] Borrow<T>, +{ fn as_ref(&self) -> &T { self } diff --git a/library/alloc/src/boxed.rs b/library/alloc/src/boxed.rs index 173d0103c11..1c549f7b6ba 100644 --- a/library/alloc/src/boxed.rs +++ b/library/alloc/src/boxed.rs @@ -632,7 +632,7 @@ impl<T> Box<[T]> { /// values[0].write(1); /// values[1].write(2); /// values[2].write(3); - /// let values = unsafe {values.assume_init() }; + /// let values = unsafe { values.assume_init() }; /// /// assert_eq!(*values, [1, 2, 3]) /// ``` @@ -1660,7 +1660,7 @@ unsafe impl<#[may_dangle] T: ?Sized, A: Allocator> Drop for Box<T, A> { #[cfg(not(no_global_oom_handling))] #[stable(feature = "rust1", since = "1.0.0")] impl<T: Default> Default for Box<T> { - /// Creates a `Box<T>`, with the `Default` value for T. + /// Creates a `Box<T>`, with the `Default` value for `T`. #[inline] fn default() -> Self { let mut x: Box<mem::MaybeUninit<T>> = Box::new_uninit(); @@ -1683,6 +1683,7 @@ impl<T: Default> Default for Box<T> { #[cfg(not(no_global_oom_handling))] #[stable(feature = "rust1", since = "1.0.0")] impl<T> Default for Box<[T]> { + /// Creates an empty `[T]` inside a `Box`. #[inline] fn default() -> Self { let ptr: Unique<[T]> = Unique::<[T; 0]>::dangling(); @@ -1705,6 +1706,19 @@ impl Default for Box<str> { } #[cfg(not(no_global_oom_handling))] +#[stable(feature = "pin_default_impls", since = "CURRENT_RUSTC_VERSION")] +impl<T> Default for Pin<Box<T>> +where + T: ?Sized, + Box<T>: Default, +{ + #[inline] + fn default() -> Self { + Box::into_pin(Box::<T>::default()) + } +} + +#[cfg(not(no_global_oom_handling))] #[stable(feature = "rust1", since = "1.0.0")] impl<T: Clone, A: Allocator + Clone> Clone for Box<T, A> { /// Returns a new box with a `clone()` of this box's contents. @@ -2102,11 +2116,6 @@ impl<F: ?Sized + Future + Unpin, A: Allocator> Future for Box<F, A> { #[stable(feature = "box_error", since = "1.8.0")] impl<E: Error> Error for Box<E> { - #[allow(deprecated, deprecated_in_future)] - fn description(&self) -> &str { - Error::description(&**self) - } - #[allow(deprecated)] fn cause(&self) -> Option<&dyn Error> { Error::cause(&**self) diff --git a/library/alloc/src/boxed/convert.rs b/library/alloc/src/boxed/convert.rs index 80626580202..45c46fb5263 100644 --- a/library/alloc/src/boxed/convert.rs +++ b/library/alloc/src/boxed/convert.rs @@ -608,12 +608,7 @@ impl<'a> From<String> for Box<dyn Error + Send + Sync + 'a> { fn from(err: String) -> Box<dyn Error + Send + Sync + 'a> { struct StringError(String); - impl Error for StringError { - #[allow(deprecated)] - fn description(&self) -> &str { - &self.0 - } - } + impl Error for StringError {} impl fmt::Display for StringError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { diff --git a/library/alloc/src/collections/btree/map.rs b/library/alloc/src/collections/btree/map.rs index 17c16e4aaff..98f11e2ea57 100644 --- a/library/alloc/src/collections/btree/map.rs +++ b/library/alloc/src/collections/btree/map.rs @@ -40,30 +40,15 @@ pub(super) const MIN_LEN: usize = node::MIN_LEN_AFTER_SPLIT; /// An ordered map based on a [B-Tree]. /// -/// B-Trees represent a fundamental compromise between cache-efficiency and actually minimizing -/// the amount of work performed in a search. In theory, a binary search tree (BST) is the optimal -/// choice for a sorted map, as a perfectly balanced BST performs the theoretical minimum amount of -/// comparisons necessary to find an element (log<sub>2</sub>n). However, in practice the way this -/// is done is *very* inefficient for modern computer architectures. In particular, every element -/// is stored in its own individually heap-allocated node. This means that every single insertion -/// triggers a heap-allocation, and every single comparison should be a cache-miss. Since these -/// are both notably expensive things to do in practice, we are forced to, at the very least, -/// reconsider the BST strategy. +/// Given a key type with a [total order], an ordered map stores its entries in key order. +/// That means that keys must be of a type that implements the [`Ord`] trait, +/// such that two keys can always be compared to determine their [`Ordering`]. +/// Examples of keys with a total order are strings with lexicographical order, +/// and numbers with their natural order. /// -/// A B-Tree instead makes each node contain B-1 to 2B-1 elements in a contiguous array. By doing -/// this, we reduce the number of allocations by a factor of B, and improve cache efficiency in -/// searches. However, this does mean that searches will have to do *more* comparisons on average. -/// The precise number of comparisons depends on the node search strategy used. For optimal cache -/// efficiency, one could search the nodes linearly. For optimal comparisons, one could search -/// the node using binary search. As a compromise, one could also perform a linear search -/// that initially only checks every i<sup>th</sup> element for some choice of i. -/// -/// Currently, our implementation simply performs naive linear search. This provides excellent -/// performance on *small* nodes of elements which are cheap to compare. However in the future we -/// would like to further explore choosing the optimal search strategy based on the choice of B, -/// and possibly other factors. Using linear search, searching for a random element is expected -/// to take B * log(n) comparisons, which is generally worse than a BST. In practice, -/// however, performance is excellent. +/// Iterators obtained from functions such as [`BTreeMap::iter`], [`BTreeMap::into_iter`], [`BTreeMap::values`], or +/// [`BTreeMap::keys`] produce their items in key order, and take worst-case logarithmic and +/// amortized constant time per item returned. /// /// It is a logic error for a key to be modified in such a way that the key's ordering relative to /// any other key, as determined by the [`Ord`] trait, changes while it is in the map. This is @@ -72,14 +57,6 @@ pub(super) const MIN_LEN: usize = node::MIN_LEN_AFTER_SPLIT; /// `BTreeMap` that observed the logic error and not result in undefined behavior. This could /// include panics, incorrect results, aborts, memory leaks, and non-termination. /// -/// Iterators obtained from functions such as [`BTreeMap::iter`], [`BTreeMap::into_iter`], [`BTreeMap::values`], or -/// [`BTreeMap::keys`] produce their items in order by key, and take worst-case logarithmic and -/// amortized constant time per item returned. -/// -/// [B-Tree]: https://en.wikipedia.org/wiki/B-tree -/// [`Cell`]: core::cell::Cell -/// [`RefCell`]: core::cell::RefCell -/// /// # Examples /// /// ``` @@ -135,6 +112,8 @@ pub(super) const MIN_LEN: usize = node::MIN_LEN_AFTER_SPLIT; /// ]); /// ``` /// +/// ## `Entry` API +/// /// `BTreeMap` implements an [`Entry API`], which allows for complex /// methods of getting, setting, updating and removing keys and their values: /// @@ -167,6 +146,43 @@ pub(super) const MIN_LEN: usize = node::MIN_LEN_AFTER_SPLIT; /// // modify an entry before an insert with in-place mutation /// player_stats.entry("mana").and_modify(|mana| *mana += 200).or_insert(100); /// ``` +/// +/// # Background +/// +/// A B-tree is (like) a [binary search tree], but adapted to the natural granularity that modern +/// machines like to consume data at. This means that each node contains an entire array of elements, +/// instead of just a single element. +/// +/// B-Trees represent a fundamental compromise between cache-efficiency and actually minimizing +/// the amount of work performed in a search. In theory, a binary search tree (BST) is the optimal +/// choice for a sorted map, as a perfectly balanced BST performs the theoretical minimum number of +/// comparisons necessary to find an element (log<sub>2</sub>n). However, in practice the way this +/// is done is *very* inefficient for modern computer architectures. In particular, every element +/// is stored in its own individually heap-allocated node. This means that every single insertion +/// triggers a heap-allocation, and every comparison is a potential cache-miss due to the indirection. +/// Since both heap-allocations and cache-misses are notably expensive in practice, we are forced to, +/// at the very least, reconsider the BST strategy. +/// +/// A B-Tree instead makes each node contain B-1 to 2B-1 elements in a contiguous array. By doing +/// this, we reduce the number of allocations by a factor of B, and improve cache efficiency in +/// searches. However, this does mean that searches will have to do *more* comparisons on average. +/// The precise number of comparisons depends on the node search strategy used. For optimal cache +/// efficiency, one could search the nodes linearly. For optimal comparisons, one could search +/// the node using binary search. As a compromise, one could also perform a linear search +/// that initially only checks every i<sup>th</sup> element for some choice of i. +/// +/// Currently, our implementation simply performs naive linear search. This provides excellent +/// performance on *small* nodes of elements which are cheap to compare. However in the future we +/// would like to further explore choosing the optimal search strategy based on the choice of B, +/// and possibly other factors. Using linear search, searching for a random element is expected +/// to take B * log(n) comparisons, which is generally worse than a BST. In practice, +/// however, performance is excellent. +/// +/// [B-Tree]: https://en.wikipedia.org/wiki/B-tree +/// [binary search tree]: https://en.wikipedia.org/wiki/Binary_search_tree +/// [total order]: https://en.wikipedia.org/wiki/Total_order +/// [`Cell`]: core::cell::Cell +/// [`RefCell`]: core::cell::RefCell #[stable(feature = "rust1", since = "1.0.0")] #[cfg_attr(not(test), rustc_diagnostic_item = "BTreeMap")] #[rustc_insignificant_dtor] @@ -382,6 +398,7 @@ impl<'a, K: 'a, V: 'a> Default for Iter<'a, K, V> { /// documentation for more. /// /// [`iter_mut`]: BTreeMap::iter_mut +#[must_use = "iterators are lazy and do nothing unless consumed"] #[stable(feature = "rust1", since = "1.0.0")] pub struct IterMut<'a, K: 'a, V: 'a> { range: LazyLeafRange<marker::ValMut<'a>, K, V>, @@ -391,7 +408,6 @@ pub struct IterMut<'a, K: 'a, V: 'a> { _marker: PhantomData<&'a mut (K, V)>, } -#[must_use = "iterators are lazy and do nothing unless consumed"] #[stable(feature = "collection_debug", since = "1.17.0")] impl<K: fmt::Debug, V: fmt::Debug> fmt::Debug for IterMut<'_, K, V> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -1417,7 +1433,6 @@ impl<K, V, A: Allocator + Clone> BTreeMap<K, V, A> { /// # Examples /// /// ``` - /// #![feature(btree_extract_if)] /// use std::collections::BTreeMap; /// /// // Splitting a map into even and odd keys, reusing the original map: @@ -1434,7 +1449,7 @@ impl<K, V, A: Allocator + Clone> BTreeMap<K, V, A> { /// assert_eq!(low.keys().copied().collect::<Vec<_>>(), [0, 1, 2, 3]); /// assert_eq!(high.keys().copied().collect::<Vec<_>>(), [4, 5, 6, 7]); /// ``` - #[unstable(feature = "btree_extract_if", issue = "70530")] + #[stable(feature = "btree_extract_if", since = "CURRENT_RUSTC_VERSION")] pub fn extract_if<F, R>(&mut self, range: R, pred: F) -> ExtractIf<'_, K, V, R, F, A> where K: Ord, @@ -1921,7 +1936,7 @@ impl<K, V> Default for Values<'_, K, V> { } /// An iterator produced by calling `extract_if` on BTreeMap. -#[unstable(feature = "btree_extract_if", issue = "70530")] +#[stable(feature = "btree_extract_if", since = "CURRENT_RUSTC_VERSION")] #[must_use = "iterators are lazy and do nothing unless consumed"] pub struct ExtractIf< 'a, @@ -1954,7 +1969,7 @@ pub(super) struct ExtractIfInner<'a, K, V, R> { range: R, } -#[unstable(feature = "btree_extract_if", issue = "70530")] +#[stable(feature = "btree_extract_if", since = "CURRENT_RUSTC_VERSION")] impl<K, V, R, F, A> fmt::Debug for ExtractIf<'_, K, V, R, F, A> where K: fmt::Debug, @@ -1966,7 +1981,7 @@ where } } -#[unstable(feature = "btree_extract_if", issue = "70530")] +#[stable(feature = "btree_extract_if", since = "CURRENT_RUSTC_VERSION")] impl<K, V, R, F, A: Allocator + Clone> Iterator for ExtractIf<'_, K, V, R, F, A> where K: PartialOrd, @@ -2040,7 +2055,7 @@ impl<'a, K, V, R> ExtractIfInner<'a, K, V, R> { } } -#[unstable(feature = "btree_extract_if", issue = "70530")] +#[stable(feature = "btree_extract_if", since = "CURRENT_RUSTC_VERSION")] impl<K, V, R, F> FusedIterator for ExtractIf<'_, K, V, R, F> where K: PartialOrd, diff --git a/library/alloc/src/collections/btree/map/entry.rs b/library/alloc/src/collections/btree/map/entry.rs index ea8fa363c38..21486651b0b 100644 --- a/library/alloc/src/collections/btree/map/entry.rs +++ b/library/alloc/src/collections/btree/map/entry.rs @@ -136,10 +136,6 @@ impl<'a, K: Debug + Ord, V: Debug, A: Allocator + Clone> fmt::Display impl<'a, K: core::fmt::Debug + Ord, V: core::fmt::Debug> core::error::Error for crate::collections::btree_map::OccupiedError<'a, K, V> { - #[allow(deprecated)] - fn description(&self) -> &str { - "key already exists" - } } impl<'a, K: Ord, V, A: Allocator + Clone> Entry<'a, K, V, A> { @@ -275,7 +271,6 @@ impl<'a, K: Ord, V, A: Allocator + Clone> Entry<'a, K, V, A> { /// # Examples /// /// ``` - /// #![feature(btree_entry_insert)] /// use std::collections::BTreeMap; /// /// let mut map: BTreeMap<&str, String> = BTreeMap::new(); @@ -284,7 +279,7 @@ impl<'a, K: Ord, V, A: Allocator + Clone> Entry<'a, K, V, A> { /// assert_eq!(entry.key(), &"poneyland"); /// ``` #[inline] - #[unstable(feature = "btree_entry_insert", issue = "65225")] + #[stable(feature = "btree_entry_insert", since = "CURRENT_RUSTC_VERSION")] pub fn insert_entry(self, value: V) -> OccupiedEntry<'a, K, V, A> { match self { Occupied(mut entry) => { @@ -383,7 +378,6 @@ impl<'a, K: Ord, V, A: Allocator + Clone> VacantEntry<'a, K, V, A> { /// # Examples /// /// ``` - /// #![feature(btree_entry_insert)] /// use std::collections::BTreeMap; /// use std::collections::btree_map::Entry; /// @@ -395,7 +389,7 @@ impl<'a, K: Ord, V, A: Allocator + Clone> VacantEntry<'a, K, V, A> { /// } /// assert_eq!(map["poneyland"], 37); /// ``` - #[unstable(feature = "btree_entry_insert", issue = "65225")] + #[stable(feature = "btree_entry_insert", since = "CURRENT_RUSTC_VERSION")] pub fn insert_entry(mut self, value: V) -> OccupiedEntry<'a, K, V, A> { let handle = match self.handle { None => { diff --git a/library/alloc/src/collections/btree/node.rs b/library/alloc/src/collections/btree/node.rs index 37f784a322c..b233e1740b7 100644 --- a/library/alloc/src/collections/btree/node.rs +++ b/library/alloc/src/collections/btree/node.rs @@ -67,6 +67,10 @@ struct LeafNode<K, V> { impl<K, V> LeafNode<K, V> { /// Initializes a new `LeafNode` in-place. + /// + /// # Safety + /// + /// The caller must ensure that `this` points to a (possibly uninitialized) `LeafNode` unsafe fn init(this: *mut Self) { // As a general policy, we leave fields uninitialized if they can be, as this should // be both slightly faster and easier to track in Valgrind. @@ -79,9 +83,11 @@ impl<K, V> LeafNode<K, V> { /// Creates a new boxed `LeafNode`. fn new<A: Allocator + Clone>(alloc: A) -> Box<Self, A> { + let mut leaf = Box::new_uninit_in(alloc); unsafe { - let mut leaf = Box::new_uninit_in(alloc); + // SAFETY: `leaf` points to a `LeafNode` LeafNode::init(leaf.as_mut_ptr()); + // SAFETY: `leaf` was just initialized leaf.assume_init() } } diff --git a/library/alloc/src/collections/btree/set.rs b/library/alloc/src/collections/btree/set.rs index d50ce02bda7..e6b0a1f6323 100644 --- a/library/alloc/src/collections/btree/set.rs +++ b/library/alloc/src/collections/btree/set.rs @@ -1202,7 +1202,6 @@ impl<T, A: Allocator + Clone> BTreeSet<T, A> { /// # Examples /// /// ``` - /// #![feature(btree_extract_if)] /// use std::collections::BTreeSet; /// /// // Splitting a set into even and odd values, reusing the original set: @@ -1219,7 +1218,7 @@ impl<T, A: Allocator + Clone> BTreeSet<T, A> { /// assert_eq!(low.into_iter().collect::<Vec<_>>(), [0, 1, 2, 3]); /// assert_eq!(high.into_iter().collect::<Vec<_>>(), [4, 5, 6, 7]); /// ``` - #[unstable(feature = "btree_extract_if", issue = "70530")] + #[stable(feature = "btree_extract_if", since = "CURRENT_RUSTC_VERSION")] pub fn extract_if<F, R>(&mut self, range: R, pred: F) -> ExtractIf<'_, T, R, F, A> where T: Ord, @@ -1554,7 +1553,7 @@ impl<'a, T, A: Allocator + Clone> IntoIterator for &'a BTreeSet<T, A> { } /// An iterator produced by calling `extract_if` on BTreeSet. -#[unstable(feature = "btree_extract_if", issue = "70530")] +#[stable(feature = "btree_extract_if", since = "CURRENT_RUSTC_VERSION")] #[must_use = "iterators are lazy and do nothing unless consumed"] pub struct ExtractIf< 'a, @@ -1569,7 +1568,7 @@ pub struct ExtractIf< alloc: A, } -#[unstable(feature = "btree_extract_if", issue = "70530")] +#[stable(feature = "btree_extract_if", since = "CURRENT_RUSTC_VERSION")] impl<T, R, F, A> fmt::Debug for ExtractIf<'_, T, R, F, A> where T: fmt::Debug, @@ -1582,7 +1581,7 @@ where } } -#[unstable(feature = "btree_extract_if", issue = "70530")] +#[stable(feature = "btree_extract_if", since = "CURRENT_RUSTC_VERSION")] impl<T, R, F, A: Allocator + Clone> Iterator for ExtractIf<'_, T, R, F, A> where T: PartialOrd, @@ -1602,7 +1601,7 @@ where } } -#[unstable(feature = "btree_extract_if", issue = "70530")] +#[stable(feature = "btree_extract_if", since = "CURRENT_RUSTC_VERSION")] impl<T, R, F, A: Allocator + Clone> FusedIterator for ExtractIf<'_, T, R, F, A> where T: PartialOrd, diff --git a/library/alloc/src/collections/linked_list.rs b/library/alloc/src/collections/linked_list.rs index 70c344e49b7..31dfe73fc79 100644 --- a/library/alloc/src/collections/linked_list.rs +++ b/library/alloc/src/collections/linked_list.rs @@ -825,7 +825,7 @@ impl<T, A: Allocator> LinkedList<T, A> { unsafe { self.tail.as_mut().map(|node| &mut node.as_mut().element) } } - /// Adds an element first in the list. + /// Adds an element to the front of the list. /// /// This operation should compute in *O*(1) time. /// @@ -844,11 +844,34 @@ impl<T, A: Allocator> LinkedList<T, A> { /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn push_front(&mut self, elt: T) { + let _ = self.push_front_mut(elt); + } + + /// Adds an element to the front of the list, returning a reference to it. + /// + /// This operation should compute in *O*(1) time. + /// + /// # Examples + /// + /// ``` + /// #![feature(push_mut)] + /// use std::collections::LinkedList; + /// + /// let mut dl = LinkedList::from([1, 2, 3]); + /// + /// let ptr = dl.push_front_mut(2); + /// *ptr += 4; + /// assert_eq!(dl.front().unwrap(), &6); + /// ``` + #[unstable(feature = "push_mut", issue = "135974")] + #[must_use = "if you don't need a reference to the value, use `LinkedList::push_front` instead"] + pub fn push_front_mut(&mut self, elt: T) -> &mut T { let node = Box::new_in(Node::new(elt), &self.alloc); - let node_ptr = NonNull::from(Box::leak(node)); + let mut node_ptr = NonNull::from(Box::leak(node)); // SAFETY: node_ptr is a unique pointer to a node we boxed with self.alloc and leaked unsafe { self.push_front_node(node_ptr); + &mut node_ptr.as_mut().element } } @@ -876,7 +899,7 @@ impl<T, A: Allocator> LinkedList<T, A> { self.pop_front_node().map(Node::into_element) } - /// Appends an element to the back of a list. + /// Adds an element to the back of the list. /// /// This operation should compute in *O*(1) time. /// @@ -893,11 +916,34 @@ impl<T, A: Allocator> LinkedList<T, A> { #[stable(feature = "rust1", since = "1.0.0")] #[rustc_confusables("push", "append")] pub fn push_back(&mut self, elt: T) { + let _ = self.push_back_mut(elt); + } + + /// Adds an element to the back of the list, returning a reference to it. + /// + /// This operation should compute in *O*(1) time. + /// + /// # Examples + /// + /// ``` + /// #![feature(push_mut)] + /// use std::collections::LinkedList; + /// + /// let mut dl = LinkedList::from([1, 2, 3]); + /// + /// let ptr = dl.push_back_mut(2); + /// *ptr += 4; + /// assert_eq!(dl.back().unwrap(), &6); + /// ``` + #[unstable(feature = "push_mut", issue = "135974")] + #[must_use = "if you don't need a reference to the value, use `LinkedList::push_back` instead"] + pub fn push_back_mut(&mut self, elt: T) -> &mut T { let node = Box::new_in(Node::new(elt), &self.alloc); - let node_ptr = NonNull::from(Box::leak(node)); + let mut node_ptr = NonNull::from(Box::leak(node)); // SAFETY: node_ptr is a unique pointer to a node we boxed with self.alloc and leaked unsafe { self.push_back_node(node_ptr); + &mut node_ptr.as_mut().element } } diff --git a/library/alloc/src/collections/mod.rs b/library/alloc/src/collections/mod.rs index fac4d1a65ab..212d7c8465b 100644 --- a/library/alloc/src/collections/mod.rs +++ b/library/alloc/src/collections/mod.rs @@ -128,8 +128,9 @@ pub use realalloc::collections::TryReserveErrorKind; reason = "Uncertain how much info should be exposed", issue = "48043" )] +#[rustc_const_unstable(feature = "const_convert", issue = "143773")] #[cfg(not(test))] -impl From<TryReserveErrorKind> for TryReserveError { +impl const From<TryReserveErrorKind> for TryReserveError { #[inline] fn from(kind: TryReserveErrorKind) -> Self { Self { kind } @@ -137,8 +138,9 @@ impl From<TryReserveErrorKind> for TryReserveError { } #[unstable(feature = "try_reserve_kind", reason = "new API", issue = "48043")] +#[rustc_const_unstable(feature = "const_convert", issue = "143773")] #[cfg(not(test))] -impl From<LayoutError> for TryReserveErrorKind { +impl const From<LayoutError> for TryReserveErrorKind { /// Always evaluates to [`TryReserveErrorKind::CapacityOverflow`]. #[inline] fn from(_: LayoutError) -> Self { diff --git a/library/alloc/src/collections/vec_deque/drain.rs b/library/alloc/src/collections/vec_deque/drain.rs index 44fcef4ed7d..321621d18be 100644 --- a/library/alloc/src/collections/vec_deque/drain.rs +++ b/library/alloc/src/collections/vec_deque/drain.rs @@ -192,7 +192,7 @@ impl<T, A: Allocator> Drop for Drain<'_, T, A> { // this branch is never taken. // We use `#[cold]` instead of `#[inline(never)]`, because inlining this // function into the general case (`.drain(n..m)`) is fine. - // See `tests/codegen/vecdeque-drain.rs` for a test. + // See `tests/codegen-llvm/vecdeque-drain.rs` for a test. #[cold] fn join_head_and_tail_wrapping<T, A: Allocator>( source_deque: &mut VecDeque<T, A>, diff --git a/library/alloc/src/collections/vec_deque/mod.rs b/library/alloc/src/collections/vec_deque/mod.rs index 08b1828ff00..d589860524b 100644 --- a/library/alloc/src/collections/vec_deque/mod.rs +++ b/library/alloc/src/collections/vec_deque/mod.rs @@ -182,11 +182,16 @@ impl<T, A: Allocator> VecDeque<T, A> { unsafe { ptr::read(self.ptr().add(off)) } } - /// Writes an element into the buffer, moving it. + /// Writes an element into the buffer, moving it and returning a pointer to it. + /// # Safety + /// + /// May only be called if `off < self.capacity()`. #[inline] - unsafe fn buffer_write(&mut self, off: usize, value: T) { + unsafe fn buffer_write(&mut self, off: usize, value: T) -> &mut T { unsafe { - ptr::write(self.ptr().add(off), value); + let ptr = self.ptr().add(off); + ptr::write(ptr, value); + &mut *ptr } } @@ -1481,8 +1486,8 @@ impl<T, A: Allocator> VecDeque<T, A> { /// /// # Panics /// - /// Panics if the starting point is greater than the end point or if - /// the end point is greater than the length of the deque. + /// Panics if the range has `start_bound > end_bound`, or, if the range is + /// bounded on either end and past the length of the deque. /// /// # Examples /// @@ -1517,8 +1522,8 @@ impl<T, A: Allocator> VecDeque<T, A> { /// /// # Panics /// - /// Panics if the starting point is greater than the end point or if - /// the end point is greater than the length of the deque. + /// Panics if the range has `start_bound > end_bound`, or, if the range is + /// bounded on either end and past the length of the deque. /// /// # Examples /// @@ -1563,8 +1568,8 @@ impl<T, A: Allocator> VecDeque<T, A> { /// /// # Panics /// - /// Panics if the starting point is greater than the end point or if - /// the end point is greater than the length of the deque. + /// Panics if the range has `start_bound > end_bound`, or, if the range is + /// bounded on either end and past the length of the deque. /// /// # Leaking /// @@ -1888,16 +1893,34 @@ impl<T, A: Allocator> VecDeque<T, A> { #[stable(feature = "rust1", since = "1.0.0")] #[track_caller] pub fn push_front(&mut self, value: T) { + let _ = self.push_front_mut(value); + } + + /// Prepends an element to the deque, returning a reference to it. + /// + /// # Examples + /// + /// ``` + /// #![feature(push_mut)] + /// use std::collections::VecDeque; + /// + /// let mut d = VecDeque::from([1, 2, 3]); + /// let x = d.push_front_mut(8); + /// *x -= 1; + /// assert_eq!(d.front(), Some(&7)); + /// ``` + #[unstable(feature = "push_mut", issue = "135974")] + #[track_caller] + #[must_use = "if you don't need a reference to the value, use `VecDeque::push_front` instead"] + pub fn push_front_mut(&mut self, value: T) -> &mut T { if self.is_full() { self.grow(); } self.head = self.wrap_sub(self.head, 1); self.len += 1; - - unsafe { - self.buffer_write(self.head, value); - } + // SAFETY: We know that self.head is within range of the deque. + unsafe { self.buffer_write(self.head, value) } } /// Appends an element to the back of the deque. @@ -1916,12 +1939,33 @@ impl<T, A: Allocator> VecDeque<T, A> { #[rustc_confusables("push", "put", "append")] #[track_caller] pub fn push_back(&mut self, value: T) { + let _ = self.push_back_mut(value); + } + + /// Appends an element to the back of the deque, returning a reference to it. + /// + /// # Examples + /// + /// ``` + /// #![feature(push_mut)] + /// use std::collections::VecDeque; + /// + /// let mut d = VecDeque::from([1, 2, 3]); + /// let x = d.push_back_mut(9); + /// *x += 1; + /// assert_eq!(d.back(), Some(&10)); + /// ``` + #[unstable(feature = "push_mut", issue = "135974")] + #[track_caller] + #[must_use = "if you don't need a reference to the value, use `VecDeque::push_back` instead"] + pub fn push_back_mut(&mut self, value: T) -> &mut T { if self.is_full() { self.grow(); } - unsafe { self.buffer_write(self.to_physical_idx(self.len), value) } + let len = self.len; self.len += 1; + unsafe { self.buffer_write(self.to_physical_idx(len), value) } } #[inline] @@ -2007,7 +2051,7 @@ impl<T, A: Allocator> VecDeque<T, A> { /// /// # Panics /// - /// Panics if `index` is strictly greater than deque's length + /// Panics if `index` is strictly greater than the deque's length. /// /// # Examples /// @@ -2029,7 +2073,37 @@ impl<T, A: Allocator> VecDeque<T, A> { #[stable(feature = "deque_extras_15", since = "1.5.0")] #[track_caller] pub fn insert(&mut self, index: usize, value: T) { + let _ = self.insert_mut(index, value); + } + + /// Inserts an element at `index` within the deque, shifting all elements + /// with indices greater than or equal to `index` towards the back, and + /// returning a reference to it. + /// + /// Element at index 0 is the front of the queue. + /// + /// # Panics + /// + /// Panics if `index` is strictly greater than the deque's length. + /// + /// # Examples + /// + /// ``` + /// #![feature(push_mut)] + /// use std::collections::VecDeque; + /// + /// let mut vec_deque = VecDeque::from([1, 2, 3]); + /// + /// let x = vec_deque.insert_mut(1, 5); + /// *x += 7; + /// assert_eq!(vec_deque, &[1, 12, 2, 3]); + /// ``` + #[unstable(feature = "push_mut", issue = "135974")] + #[track_caller] + #[must_use = "if you don't need a reference to the value, use `VecDeque::insert` instead"] + pub fn insert_mut(&mut self, index: usize, value: T) -> &mut T { assert!(index <= self.len(), "index out of bounds"); + if self.is_full() { self.grow(); } @@ -2042,16 +2116,16 @@ impl<T, A: Allocator> VecDeque<T, A> { unsafe { // see `remove()` for explanation why this wrap_copy() call is safe. self.wrap_copy(self.to_physical_idx(index), self.to_physical_idx(index + 1), k); - self.buffer_write(self.to_physical_idx(index), value); self.len += 1; + self.buffer_write(self.to_physical_idx(index), value) } } else { let old_head = self.head; self.head = self.wrap_sub(self.head, 1); unsafe { self.wrap_copy(old_head, self.head, index); - self.buffer_write(self.to_physical_idx(index), value); self.len += 1; + self.buffer_write(self.to_physical_idx(index), value) } } } diff --git a/library/alloc/src/ffi/c_str.rs b/library/alloc/src/ffi/c_str.rs index 93bdad75380..b0c8c4b1ca4 100644 --- a/library/alloc/src/ffi/c_str.rs +++ b/library/alloc/src/ffi/c_str.rs @@ -1061,17 +1061,10 @@ impl IntoStringError { } } -impl IntoStringError { - fn description(&self) -> &str { - "C string contained non-utf8 bytes" - } -} - #[stable(feature = "cstring_into", since = "1.7.0")] impl fmt::Display for IntoStringError { - #[allow(deprecated, deprecated_in_future)] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.description().fmt(f) + "C string contained non-utf8 bytes".fmt(f) } } @@ -1099,7 +1092,7 @@ impl From<&CStr> for CString { } } -#[stable(feature = "c_string_eq_c_str", since = "CURRENT_RUSTC_VERSION")] +#[stable(feature = "c_string_eq_c_str", since = "1.90.0")] impl PartialEq<CStr> for CString { #[inline] fn eq(&self, other: &CStr) -> bool { @@ -1112,7 +1105,7 @@ impl PartialEq<CStr> for CString { } } -#[stable(feature = "c_string_eq_c_str", since = "CURRENT_RUSTC_VERSION")] +#[stable(feature = "c_string_eq_c_str", since = "1.90.0")] impl PartialEq<&CStr> for CString { #[inline] fn eq(&self, other: &&CStr) -> bool { @@ -1126,7 +1119,7 @@ impl PartialEq<&CStr> for CString { } #[cfg(not(no_global_oom_handling))] -#[stable(feature = "c_string_eq_c_str", since = "CURRENT_RUSTC_VERSION")] +#[stable(feature = "c_string_eq_c_str", since = "1.90.0")] impl PartialEq<Cow<'_, CStr>> for CString { #[inline] fn eq(&self, other: &Cow<'_, CStr>) -> bool { @@ -1221,7 +1214,7 @@ impl CStr { } } -#[stable(feature = "c_string_eq_c_str", since = "CURRENT_RUSTC_VERSION")] +#[stable(feature = "c_string_eq_c_str", since = "1.90.0")] impl PartialEq<CString> for CStr { #[inline] fn eq(&self, other: &CString) -> bool { @@ -1235,7 +1228,7 @@ impl PartialEq<CString> for CStr { } #[cfg(not(no_global_oom_handling))] -#[stable(feature = "c_string_eq_c_str", since = "CURRENT_RUSTC_VERSION")] +#[stable(feature = "c_string_eq_c_str", since = "1.90.0")] impl PartialEq<Cow<'_, Self>> for CStr { #[inline] fn eq(&self, other: &Cow<'_, Self>) -> bool { @@ -1249,7 +1242,7 @@ impl PartialEq<Cow<'_, Self>> for CStr { } #[cfg(not(no_global_oom_handling))] -#[stable(feature = "c_string_eq_c_str", since = "CURRENT_RUSTC_VERSION")] +#[stable(feature = "c_string_eq_c_str", since = "1.90.0")] impl PartialEq<CStr> for Cow<'_, CStr> { #[inline] fn eq(&self, other: &CStr) -> bool { @@ -1263,7 +1256,7 @@ impl PartialEq<CStr> for Cow<'_, CStr> { } #[cfg(not(no_global_oom_handling))] -#[stable(feature = "c_string_eq_c_str", since = "CURRENT_RUSTC_VERSION")] +#[stable(feature = "c_string_eq_c_str", since = "1.90.0")] impl PartialEq<&CStr> for Cow<'_, CStr> { #[inline] fn eq(&self, other: &&CStr) -> bool { @@ -1277,7 +1270,7 @@ impl PartialEq<&CStr> for Cow<'_, CStr> { } #[cfg(not(no_global_oom_handling))] -#[stable(feature = "c_string_eq_c_str", since = "CURRENT_RUSTC_VERSION")] +#[stable(feature = "c_string_eq_c_str", since = "1.90.0")] impl PartialEq<CString> for Cow<'_, CStr> { #[inline] fn eq(&self, other: &CString) -> bool { @@ -1291,23 +1284,13 @@ impl PartialEq<CString> for Cow<'_, CStr> { } #[stable(feature = "rust1", since = "1.0.0")] -impl core::error::Error for NulError { - #[allow(deprecated)] - fn description(&self) -> &str { - "nul byte found in data" - } -} +impl core::error::Error for NulError {} #[stable(feature = "cstring_from_vec_with_nul", since = "1.58.0")] impl core::error::Error for FromVecWithNulError {} #[stable(feature = "cstring_into", since = "1.7.0")] impl core::error::Error for IntoStringError { - #[allow(deprecated)] - fn description(&self) -> &str { - "C string contained non-utf8 bytes" - } - fn source(&self) -> Option<&(dyn core::error::Error + 'static)> { Some(&self.error) } diff --git a/library/alloc/src/fmt.rs b/library/alloc/src/fmt.rs index 30f42050ac8..82eaf7d8724 100644 --- a/library/alloc/src/fmt.rs +++ b/library/alloc/src/fmt.rs @@ -348,13 +348,13 @@ //! format := '{' [ argument ] [ ':' format_spec ] [ ws ] * '}' //! argument := integer | identifier //! -//! format_spec := [[fill]align][sign]['#']['0'][width]['.' precision]type +//! format_spec := [[fill]align][sign]['#']['0'][width]['.' precision][type] //! fill := character //! align := '<' | '^' | '>' //! sign := '+' | '-' //! width := count //! precision := count | '*' -//! type := '' | '?' | 'x?' | 'X?' | identifier +//! type := '?' | 'x?' | 'X?' | 'o' | 'x' | 'X' | 'p' | 'b' | 'e' | 'E' //! count := parameter | integer //! parameter := argument '$' //! ``` diff --git a/library/alloc/src/lib.rs b/library/alloc/src/lib.rs index 6b6e4df4cba..cba1ce40f75 100644 --- a/library/alloc/src/lib.rs +++ b/library/alloc/src/lib.rs @@ -94,7 +94,6 @@ // tidy-alphabetical-start #![feature(alloc_layout_extra)] #![feature(allocator_api)] -#![feature(array_chunks)] #![feature(array_into_iter_constructors)] #![feature(array_windows)] #![feature(ascii_char)] @@ -103,14 +102,15 @@ #![feature(async_iterator)] #![feature(bstr)] #![feature(bstr_internals)] +#![feature(cast_maybe_uninit)] #![feature(char_internals)] #![feature(char_max_len)] #![feature(clone_to_uninit)] #![feature(coerce_unsized)] +#![feature(const_convert)] #![feature(const_default)] #![feature(const_eval_select)] #![feature(const_heap)] -#![feature(const_trait_impl)] #![feature(core_intrinsics)] #![feature(deprecated_suggestion)] #![feature(deref_pure_trait)] @@ -159,6 +159,7 @@ #![feature(unicode_internals)] #![feature(unsize)] #![feature(unwrap_infallible)] +#![feature(wtf8_internals)] // tidy-alphabetical-end // // Language features: @@ -167,6 +168,7 @@ #![feature(allow_internal_unstable)] #![feature(cfg_sanitize)] #![feature(const_precise_live_drops)] +#![feature(const_trait_impl)] #![feature(coroutine_trait)] #![feature(decl_macro)] #![feature(dropck_eyepatch)] @@ -232,6 +234,8 @@ pub mod sync; #[cfg(all(not(no_global_oom_handling), not(no_rc), not(no_sync)))] pub mod task; pub mod vec; +#[cfg(all(not(no_rc), not(no_sync), not(no_global_oom_handling)))] +pub mod wtf8; #[doc(hidden)] #[unstable(feature = "liballoc_internals", issue = "none", reason = "implementation detail")] diff --git a/library/alloc/src/raw_vec/mod.rs b/library/alloc/src/raw_vec/mod.rs index 3e006a2d1bd..b0027e964e4 100644 --- a/library/alloc/src/raw_vec/mod.rs +++ b/library/alloc/src/raw_vec/mod.rs @@ -155,7 +155,7 @@ impl RawVecInner<Global> { } // Tiny Vecs are dumb. Skip to: -// - 8 if the element size is 1, because any heap allocators is likely +// - 8 if the element size is 1, because any heap allocator is likely // to round up a request of less than 8 bytes to at least 8 bytes. // - 4 if elements are moderate-sized (<= 1 KiB). // - 1 otherwise, to avoid wasting too much space for very short Vecs. @@ -468,10 +468,6 @@ impl<A: Allocator> RawVecInner<A> { return Ok(Self::new_in(alloc, elem_layout.alignment())); } - if let Err(err) = alloc_guard(layout.size()) { - return Err(err); - } - let result = match init { AllocInit::Uninitialized => alloc.allocate(layout), #[cfg(not(no_global_oom_handling))] @@ -662,7 +658,7 @@ impl<A: Allocator> RawVecInner<A> { let new_layout = layout_array(cap, elem_layout)?; let ptr = finish_grow(new_layout, self.current_memory(elem_layout), &mut self.alloc)?; - // SAFETY: finish_grow would have resulted in a capacity overflow if we tried to allocate more than `isize::MAX` items + // SAFETY: layout_array would have resulted in a capacity overflow if we tried to allocate more than `isize::MAX` items unsafe { self.set_ptr_and_cap(ptr, cap) }; Ok(()) @@ -684,7 +680,7 @@ impl<A: Allocator> RawVecInner<A> { let new_layout = layout_array(cap, elem_layout)?; let ptr = finish_grow(new_layout, self.current_memory(elem_layout), &mut self.alloc)?; - // SAFETY: finish_grow would have resulted in a capacity overflow if we tried to allocate more than `isize::MAX` items + // SAFETY: layout_array would have resulted in a capacity overflow if we tried to allocate more than `isize::MAX` items unsafe { self.set_ptr_and_cap(ptr, cap); } @@ -761,7 +757,7 @@ impl<A: Allocator> RawVecInner<A> { } // not marked inline(never) since we want optimizers to be able to observe the specifics of this -// function, see tests/codegen/vec-reserve-extend.rs. +// function, see tests/codegen-llvm/vec-reserve-extend.rs. #[cold] fn finish_grow<A>( new_layout: Layout, @@ -771,8 +767,6 @@ fn finish_grow<A>( where A: Allocator, { - alloc_guard(new_layout.size())?; - let memory = if let Some((ptr, old_layout)) = current_memory { debug_assert_eq!(old_layout.align(), new_layout.align()); unsafe { @@ -799,23 +793,6 @@ fn handle_error(e: TryReserveError) -> ! { } } -// We need to guarantee the following: -// * We don't ever allocate `> isize::MAX` byte-size objects. -// * We don't overflow `usize::MAX` and actually allocate too little. -// -// On 64-bit we just need to check for overflow since trying to allocate -// `> isize::MAX` bytes will surely fail. On 32-bit and 16-bit we need to add -// an extra guard for this in case we're running on a platform which can use -// all 4GB in user-space, e.g., PAE or x32. -#[inline] -fn alloc_guard(alloc_size: usize) -> Result<(), TryReserveError> { - if usize::BITS < 64 && alloc_size > isize::MAX as usize { - Err(CapacityOverflow.into()) - } else { - Ok(()) - } -} - #[inline] fn layout_array(cap: usize, elem_layout: Layout) -> Result<Layout, TryReserveError> { elem_layout.repeat(cap).map(|(layout, _pad)| layout).map_err(|_| CapacityOverflow.into()) diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs index 0b5c9240db0..fcb466778a3 100644 --- a/library/alloc/src/rc.rs +++ b/library/alloc/src/rc.rs @@ -2345,7 +2345,7 @@ impl<T: Default> Default for Rc<T> { /// assert_eq!(*x, 0); /// ``` #[inline] - fn default() -> Rc<T> { + fn default() -> Self { unsafe { Self::from_inner( Box::leak(Box::write( @@ -2361,7 +2361,7 @@ impl<T: Default> Default for Rc<T> { #[cfg(not(no_global_oom_handling))] #[stable(feature = "more_rc_default_impls", since = "1.80.0")] impl Default for Rc<str> { - /// Creates an empty str inside an Rc + /// Creates an empty `str` inside an `Rc`. /// /// This may or may not share an allocation with other Rcs on the same thread. #[inline] @@ -2375,7 +2375,7 @@ impl Default for Rc<str> { #[cfg(not(no_global_oom_handling))] #[stable(feature = "more_rc_default_impls", since = "1.80.0")] impl<T> Default for Rc<[T]> { - /// Creates an empty `[T]` inside an Rc + /// Creates an empty `[T]` inside an `Rc`. /// /// This may or may not share an allocation with other Rcs on the same thread. #[inline] @@ -2385,6 +2385,19 @@ impl<T> Default for Rc<[T]> { } } +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "pin_default_impls", since = "CURRENT_RUSTC_VERSION")] +impl<T> Default for Pin<Rc<T>> +where + T: ?Sized, + Rc<T>: Default, +{ + #[inline] + fn default() -> Self { + unsafe { Pin::new_unchecked(Rc::<T>::default()) } + } +} + #[stable(feature = "rust1", since = "1.0.0")] trait RcEqIdent<T: ?Sized + PartialEq, A: Allocator> { fn eq(&self, other: &Rc<T, A>) -> bool; diff --git a/library/alloc/src/slice.rs b/library/alloc/src/slice.rs index b4da56578c8..ce9f967cc38 100644 --- a/library/alloc/src/slice.rs +++ b/library/alloc/src/slice.rs @@ -16,10 +16,6 @@ use core::cmp::Ordering::{self, Less}; use core::mem::MaybeUninit; #[cfg(not(no_global_oom_handling))] use core::ptr; -#[unstable(feature = "array_chunks", issue = "74985")] -pub use core::slice::ArrayChunks; -#[unstable(feature = "array_chunks", issue = "74985")] -pub use core::slice::ArrayChunksMut; #[unstable(feature = "array_windows", issue = "75027")] pub use core::slice::ArrayWindows; #[stable(feature = "inherent_ascii_escape", since = "1.60.0")] diff --git a/library/alloc/src/str.rs b/library/alloc/src/str.rs index 22cdd8ecde0..e772ac25a95 100644 --- a/library/alloc/src/str.rs +++ b/library/alloc/src/str.rs @@ -418,9 +418,8 @@ impl str { } fn case_ignorable_then_cased<I: Iterator<Item = char>>(iter: I) -> bool { - use core::unicode::{Case_Ignorable, Cased}; - match iter.skip_while(|&c| Case_Ignorable(c)).next() { - Some(c) => Cased(c), + match iter.skip_while(|&c| c.is_case_ignorable()).next() { + Some(c) => c.is_cased(), None => false, } } diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs index a189c00a6b6..e669c4708ad 100644 --- a/library/alloc/src/string.rs +++ b/library/alloc/src/string.rs @@ -265,12 +265,12 @@ use crate::vec::{self, Vec}; /// You can look at these with the [`as_ptr`], [`len`], and [`capacity`] /// methods: /// +// FIXME Update this when vec_into_raw_parts is stabilized /// ``` /// use std::mem; /// /// let story = String::from("Once upon a time..."); /// -// FIXME Update this when vec_into_raw_parts is stabilized /// // Prevent automatically dropping the String's data /// let mut story = mem::ManuallyDrop::new(story); /// @@ -787,12 +787,12 @@ impl String { #[cfg(not(no_global_oom_handling))] #[unstable(feature = "str_from_utf16_endian", issue = "116258")] pub fn from_utf16le(v: &[u8]) -> Result<String, FromUtf16Error> { - if v.len() % 2 != 0 { + let (chunks, []) = v.as_chunks::<2>() else { return Err(FromUtf16Error(())); - } + }; match (cfg!(target_endian = "little"), unsafe { v.align_to::<u16>() }) { (true, ([], v, [])) => Self::from_utf16(v), - _ => char::decode_utf16(v.array_chunks::<2>().copied().map(u16::from_le_bytes)) + _ => char::decode_utf16(chunks.iter().copied().map(u16::from_le_bytes)) .collect::<Result<_, _>>() .map_err(|_| FromUtf16Error(())), } @@ -830,11 +830,11 @@ impl String { (true, ([], v, [])) => Self::from_utf16_lossy(v), (true, ([], v, [_remainder])) => Self::from_utf16_lossy(v) + "\u{FFFD}", _ => { - let mut iter = v.array_chunks::<2>(); - let string = char::decode_utf16(iter.by_ref().copied().map(u16::from_le_bytes)) + let (chunks, remainder) = v.as_chunks::<2>(); + let string = char::decode_utf16(chunks.iter().copied().map(u16::from_le_bytes)) .map(|r| r.unwrap_or(char::REPLACEMENT_CHARACTER)) .collect(); - if iter.remainder().is_empty() { string } else { string + "\u{FFFD}" } + if remainder.is_empty() { string } else { string + "\u{FFFD}" } } } } @@ -862,12 +862,12 @@ impl String { #[cfg(not(no_global_oom_handling))] #[unstable(feature = "str_from_utf16_endian", issue = "116258")] pub fn from_utf16be(v: &[u8]) -> Result<String, FromUtf16Error> { - if v.len() % 2 != 0 { + let (chunks, []) = v.as_chunks::<2>() else { return Err(FromUtf16Error(())); - } + }; match (cfg!(target_endian = "big"), unsafe { v.align_to::<u16>() }) { (true, ([], v, [])) => Self::from_utf16(v), - _ => char::decode_utf16(v.array_chunks::<2>().copied().map(u16::from_be_bytes)) + _ => char::decode_utf16(chunks.iter().copied().map(u16::from_be_bytes)) .collect::<Result<_, _>>() .map_err(|_| FromUtf16Error(())), } @@ -905,11 +905,11 @@ impl String { (true, ([], v, [])) => Self::from_utf16_lossy(v), (true, ([], v, [_remainder])) => Self::from_utf16_lossy(v) + "\u{FFFD}", _ => { - let mut iter = v.array_chunks::<2>(); - let string = char::decode_utf16(iter.by_ref().copied().map(u16::from_be_bytes)) + let (chunks, remainder) = v.as_chunks::<2>(); + let string = char::decode_utf16(chunks.iter().copied().map(u16::from_be_bytes)) .map(|r| r.unwrap_or(char::REPLACEMENT_CHARACTER)) .collect(); - if iter.remainder().is_empty() { string } else { string + "\u{FFFD}" } + if remainder.is_empty() { string } else { string + "\u{FFFD}" } } } } @@ -970,13 +970,13 @@ impl String { /// /// # Examples /// + // FIXME Update this when vec_into_raw_parts is stabilized /// ``` /// use std::mem; /// /// unsafe { /// let s = String::from("hello"); /// - // FIXME Update this when vec_into_raw_parts is stabilized /// // Prevent automatically dropping the String's data /// let mut s = mem::ManuallyDrop::new(s); /// @@ -1117,8 +1117,8 @@ impl String { /// /// # Panics /// - /// Panics if the starting point or end point do not lie on a [`char`] - /// boundary, or if they're out of bounds. + /// Panics if the range has `start_bound > end_bound`, or, if the range is + /// bounded on either end and does not lie on a [`char`] boundary. /// /// # Examples /// @@ -1939,8 +1939,8 @@ impl String { /// /// # Panics /// - /// Panics if the starting point or end point do not lie on a [`char`] - /// boundary, or if they're out of bounds. + /// Panics if the range has `start_bound > end_bound`, or, if the range is + /// bounded on either end and does not lie on a [`char`] boundary. /// /// # Leaking /// @@ -2050,8 +2050,8 @@ impl String { /// /// # Panics /// - /// Panics if the starting point or end point do not lie on a [`char`] - /// boundary, or if they're out of bounds. + /// Panics if the range has `start_bound > end_bound`, or, if the range is + /// bounded on either end and does not lie on a [`char`] boundary. /// /// # Examples /// @@ -2285,20 +2285,10 @@ impl fmt::Display for FromUtf16Error { } #[stable(feature = "rust1", since = "1.0.0")] -impl Error for FromUtf8Error { - #[allow(deprecated)] - fn description(&self) -> &str { - "invalid utf-8" - } -} +impl Error for FromUtf8Error {} #[stable(feature = "rust1", since = "1.0.0")] -impl Error for FromUtf16Error { - #[allow(deprecated)] - fn description(&self) -> &str { - "invalid utf-16" - } -} +impl Error for FromUtf16Error {} #[cfg(not(no_global_oom_handling))] #[stable(feature = "rust1", since = "1.0.0")] @@ -2949,68 +2939,41 @@ impl SpecToString for i8 { } } -// Generic/generated code can sometimes have multiple, nested references -// for strings, including `&&&str`s that would never be written -// by hand. This macro generates twelve layers of nested `&`-impl -// for primitive strings. -#[cfg(not(no_global_oom_handling))] -macro_rules! to_string_str_wrap_in_ref { - {x $($x:ident)*} => { - &to_string_str_wrap_in_ref! { $($x)* } - }; - {} => { str }; -} -#[cfg(not(no_global_oom_handling))] -macro_rules! to_string_expr_wrap_in_deref { - {$self:expr ; x $($x:ident)*} => { - *(to_string_expr_wrap_in_deref! { $self ; $($x)* }) - }; - {$self:expr ;} => { $self }; -} #[cfg(not(no_global_oom_handling))] macro_rules! to_string_str { - {$($($x:ident)*),+} => { + {$($type:ty,)*} => { $( - impl SpecToString for to_string_str_wrap_in_ref!($($x)*) { + impl SpecToString for $type { #[inline] fn spec_to_string(&self) -> String { - String::from(to_string_expr_wrap_in_deref!(self ; $($x)*)) + let s: &str = self; + String::from(s) } } - )+ + )* }; } #[cfg(not(no_global_oom_handling))] to_string_str! { - x x x x x x x x x x x x, - x x x x x x x x x x x, - x x x x x x x x x x, - x x x x x x x x x, - x x x x x x x x, - x x x x x x x, - x x x x x x, - x x x x x, - x x x x, - x x x, - x x, - x, -} - -#[cfg(not(no_global_oom_handling))] -impl SpecToString for Cow<'_, str> { - #[inline] - fn spec_to_string(&self) -> String { - self[..].to_owned() - } -} - -#[cfg(not(no_global_oom_handling))] -impl SpecToString for String { - #[inline] - fn spec_to_string(&self) -> String { - self.to_owned() - } + Cow<'_, str>, + String, + // Generic/generated code can sometimes have multiple, nested references + // for strings, including `&&&str`s that would never be written + // by hand. + &&&&&&&&&&&&str, + &&&&&&&&&&&str, + &&&&&&&&&&str, + &&&&&&&&&str, + &&&&&&&&str, + &&&&&&&str, + &&&&&&str, + &&&&&str, + &&&&str, + &&&str, + &&str, + &str, + str, } #[cfg(not(no_global_oom_handling))] diff --git a/library/alloc/src/sync.rs b/library/alloc/src/sync.rs index 4090a04f21a..32396cccb8f 100644 --- a/library/alloc/src/sync.rs +++ b/library/alloc/src/sync.rs @@ -3644,6 +3644,19 @@ impl<T> Default for Arc<[T]> { } } +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "pin_default_impls", since = "CURRENT_RUSTC_VERSION")] +impl<T> Default for Pin<Arc<T>> +where + T: ?Sized, + Arc<T>: Default, +{ + #[inline] + fn default() -> Self { + unsafe { Pin::new_unchecked(Arc::<T>::default()) } + } +} + #[stable(feature = "rust1", since = "1.0.0")] impl<T: ?Sized + Hash, A: Allocator> Hash for Arc<T, A> { fn hash<H: Hasher>(&self, state: &mut H) { @@ -4090,11 +4103,6 @@ impl<T: ?Sized, A: Allocator> Drop for UniqueArcUninit<T, A> { #[stable(feature = "arc_error", since = "1.52.0")] impl<T: core::error::Error + ?Sized> core::error::Error for Arc<T> { - #[allow(deprecated, deprecated_in_future)] - fn description(&self) -> &str { - core::error::Error::description(&**self) - } - #[allow(deprecated)] fn cause(&self) -> Option<&dyn core::error::Error> { core::error::Error::cause(&**self) diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index 9856e9c18ec..10c7ee4f6c8 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -49,7 +49,27 @@ //! v[1] = v[1] + 5; //! ``` //! +//! # Memory layout +//! +//! When the type is non-zero-sized and the capacity is nonzero, [`Vec`] uses the [`Global`] +//! allocator for its allocation. It is valid to convert both ways between such a [`Vec`] and a raw +//! pointer allocated with the [`Global`] allocator, provided that the [`Layout`] used with the +//! allocator is correct for a sequence of `capacity` elements of the type, and the first `len` +//! values pointed to by the raw pointer are valid. More precisely, a `ptr: *mut T` that has been +//! allocated with the [`Global`] allocator with [`Layout::array::<T>(capacity)`][Layout::array] may +//! be converted into a vec using +//! [`Vec::<T>::from_raw_parts(ptr, len, capacity)`](Vec::from_raw_parts). Conversely, the memory +//! backing a `value: *mut T` obtained from [`Vec::<T>::as_mut_ptr`] may be deallocated using the +//! [`Global`] allocator with the same layout. +//! +//! For zero-sized types (ZSTs), or when the capacity is zero, the `Vec` pointer must be non-null +//! and sufficiently aligned. The recommended way to build a `Vec` of ZSTs if [`vec!`] cannot be +//! used is to use [`ptr::NonNull::dangling`]. +//! //! [`push`]: Vec::push +//! [`ptr::NonNull::dangling`]: NonNull::dangling +//! [`Layout`]: crate::alloc::Layout +//! [Layout::array]: crate::alloc::Layout::array #![stable(feature = "rust1", since = "1.0.0")] @@ -523,18 +543,23 @@ impl<T> Vec<T> { /// This is highly unsafe, due to the number of invariants that aren't /// checked: /// - /// * `ptr` must have been allocated using the global allocator, such as via - /// the [`alloc::alloc`] function. - /// * `T` needs to have the same alignment as what `ptr` was allocated with. + /// * If `T` is not a zero-sized type and the capacity is nonzero, `ptr` must have + /// been allocated using the global allocator, such as via the [`alloc::alloc`] + /// function. If `T` is a zero-sized type or the capacity is zero, `ptr` need + /// only be non-null and aligned. + /// * `T` needs to have the same alignment as what `ptr` was allocated with, + /// if the pointer is required to be allocated. /// (`T` having a less strict alignment is not sufficient, the alignment really /// needs to be equal to satisfy the [`dealloc`] requirement that memory must be /// allocated and deallocated with the same layout.) - /// * The size of `T` times the `capacity` (ie. the allocated size in bytes) needs - /// to be the same size as the pointer was allocated with. (Because similar to - /// alignment, [`dealloc`] must be called with the same layout `size`.) + /// * The size of `T` times the `capacity` (ie. the allocated size in bytes), if + /// nonzero, needs to be the same size as the pointer was allocated with. + /// (Because similar to alignment, [`dealloc`] must be called with the same + /// layout `size`.) /// * `length` needs to be less than or equal to `capacity`. /// * The first `length` values must be properly initialized values of type `T`. - /// * `capacity` needs to be the capacity that the pointer was allocated with. + /// * `capacity` needs to be the capacity that the pointer was allocated with, + /// if the pointer is required to be allocated. /// * The allocated size in bytes must be no larger than `isize::MAX`. /// See the safety documentation of [`pointer::offset`]. /// @@ -566,13 +591,13 @@ impl<T> Vec<T> { /// /// # Examples /// + // FIXME Update this when vec_into_raw_parts is stabilized /// ``` /// use std::ptr; /// use std::mem; /// /// let v = vec![1, 2, 3]; /// - // FIXME Update this when vec_into_raw_parts is stabilized /// // Prevent running `v`'s destructor so we are in complete control /// // of the allocation. /// let mut v = mem::ManuallyDrop::new(v); @@ -674,6 +699,7 @@ impl<T> Vec<T> { /// /// # Examples /// + // FIXME Update this when vec_into_raw_parts is stabilized /// ``` /// #![feature(box_vec_non_null)] /// @@ -682,7 +708,6 @@ impl<T> Vec<T> { /// /// let v = vec![1, 2, 3]; /// - // FIXME Update this when vec_into_raw_parts is stabilized /// // Prevent running `v`'s destructor so we are in complete control /// // of the allocation. /// let mut v = mem::ManuallyDrop::new(v); @@ -735,33 +760,6 @@ impl<T> Vec<T> { unsafe { Self::from_parts_in(ptr, length, capacity, Global) } } - /// Returns a mutable reference to the last item in the vector, or - /// `None` if it is empty. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// #![feature(vec_peek_mut)] - /// let mut vec = Vec::new(); - /// assert!(vec.peek_mut().is_none()); - /// - /// vec.push(1); - /// vec.push(5); - /// vec.push(2); - /// assert_eq!(vec.last(), Some(&2)); - /// if let Some(mut val) = vec.peek_mut() { - /// *val = 0; - /// } - /// assert_eq!(vec.last(), Some(&0)); - /// ``` - #[inline] - #[unstable(feature = "vec_peek_mut", issue = "122742")] - pub fn peek_mut(&mut self) -> Option<PeekMut<'_, T>> { - PeekMut::new(self) - } - /// Decomposes a `Vec<T>` into its raw components: `(pointer, length, capacity)`. /// /// Returns the raw pointer to the underlying data, the length of @@ -770,12 +768,16 @@ impl<T> Vec<T> { /// order as the arguments to [`from_raw_parts`]. /// /// After calling this function, the caller is responsible for the - /// memory previously managed by the `Vec`. The only way to do - /// this is to convert the raw pointer, length, and capacity back - /// into a `Vec` with the [`from_raw_parts`] function, allowing - /// the destructor to perform the cleanup. + /// memory previously managed by the `Vec`. Most often, one does + /// this by converting the raw pointer, length, and capacity back + /// into a `Vec` with the [`from_raw_parts`] function; more generally, + /// if `T` is non-zero-sized and the capacity is nonzero, one may use + /// any method that calls [`dealloc`] with a layout of + /// `Layout::array::<T>(capacity)`; if `T` is zero-sized or the + /// capacity is zero, nothing needs to be done. /// /// [`from_raw_parts`]: Vec::from_raw_parts + /// [`dealloc`]: crate::alloc::GlobalAlloc::dealloc /// /// # Examples /// @@ -994,6 +996,7 @@ impl<T, A: Allocator> Vec<T, A> { /// /// # Examples /// + // FIXME Update this when vec_into_raw_parts is stabilized /// ``` /// #![feature(allocator_api)] /// @@ -1007,7 +1010,6 @@ impl<T, A: Allocator> Vec<T, A> { /// v.push(2); /// v.push(3); /// - // FIXME Update this when vec_into_raw_parts is stabilized /// // Prevent running `v`'s destructor so we are in complete control /// // of the allocation. /// let mut v = mem::ManuallyDrop::new(v); @@ -1114,6 +1116,7 @@ impl<T, A: Allocator> Vec<T, A> { /// /// # Examples /// + // FIXME Update this when vec_into_raw_parts is stabilized /// ``` /// #![feature(allocator_api, box_vec_non_null)] /// @@ -1127,7 +1130,6 @@ impl<T, A: Allocator> Vec<T, A> { /// v.push(2); /// v.push(3); /// - // FIXME Update this when vec_into_raw_parts is stabilized /// // Prevent running `v`'s destructor so we are in complete control /// // of the allocation. /// let mut v = mem::ManuallyDrop::new(v); @@ -1755,6 +1757,12 @@ impl<T, A: Allocator> Vec<T, A> { /// may still invalidate this pointer. /// See the second example below for how this guarantee can be used. /// + /// The method also guarantees that, as long as `T` is not zero-sized and the capacity is + /// nonzero, the pointer may be passed into [`dealloc`] with a layout of + /// `Layout::array::<T>(capacity)` in order to deallocate the backing memory. If this is done, + /// be careful not to run the destructor of the `Vec`, as dropping it will result in + /// double-frees. Wrapping the `Vec` in a [`ManuallyDrop`] is the typical way to achieve this. + /// /// # Examples /// /// ``` @@ -1787,9 +1795,24 @@ impl<T, A: Allocator> Vec<T, A> { /// } /// ``` /// + /// Deallocating a vector using [`Box`] (which uses [`dealloc`] internally): + /// + /// ``` + /// use std::mem::{ManuallyDrop, MaybeUninit}; + /// + /// let mut v = ManuallyDrop::new(vec![0, 1, 2]); + /// let ptr = v.as_mut_ptr(); + /// let capacity = v.capacity(); + /// let slice_ptr: *mut [MaybeUninit<i32>] = + /// std::ptr::slice_from_raw_parts_mut(ptr.cast(), capacity); + /// drop(unsafe { Box::from_raw(slice_ptr) }); + /// ``` + /// /// [`as_mut_ptr`]: Vec::as_mut_ptr /// [`as_ptr`]: Vec::as_ptr /// [`as_non_null`]: Vec::as_non_null + /// [`dealloc`]: crate::alloc::GlobalAlloc::dealloc + /// [`ManuallyDrop`]: core::mem::ManuallyDrop #[stable(feature = "vec_as_ptr", since = "1.37.0")] #[rustc_const_stable(feature = "const_vec_string_slice", since = "1.87.0")] #[rustc_never_returns_null_ptr] @@ -2046,6 +2069,38 @@ impl<T, A: Allocator> Vec<T, A> { #[stable(feature = "rust1", since = "1.0.0")] #[track_caller] pub fn insert(&mut self, index: usize, element: T) { + let _ = self.insert_mut(index, element); + } + + /// Inserts an element at position `index` within the vector, shifting all + /// elements after it to the right, and returning a reference to the new + /// element. + /// + /// # Panics + /// + /// Panics if `index > len`. + /// + /// # Examples + /// + /// ``` + /// #![feature(push_mut)] + /// let mut vec = vec![1, 3, 5, 9]; + /// let x = vec.insert_mut(3, 6); + /// *x += 1; + /// assert_eq!(vec, [1, 3, 5, 7, 9]); + /// ``` + /// + /// # Time complexity + /// + /// Takes *O*([`Vec::len`]) time. All items after the insertion index must be + /// shifted to the right. In the worst case, all elements are shifted when + /// the insertion index is 0. + #[cfg(not(no_global_oom_handling))] + #[inline] + #[unstable(feature = "push_mut", issue = "135974")] + #[track_caller] + #[must_use = "if you don't need a reference to the value, use `Vec::insert` instead"] + pub fn insert_mut(&mut self, index: usize, element: T) -> &mut T { #[cold] #[cfg_attr(not(feature = "panic_immediate_abort"), inline(never))] #[track_caller] @@ -2067,8 +2122,8 @@ impl<T, A: Allocator> Vec<T, A> { unsafe { // infallible // The spot to put the new value + let p = self.as_mut_ptr().add(index); { - let p = self.as_mut_ptr().add(index); if index < len { // Shift everything over to make space. (Duplicating the // `index`th element into two consecutive places.) @@ -2079,6 +2134,7 @@ impl<T, A: Allocator> Vec<T, A> { ptr::write(p, element); } self.set_len(len + 1); + &mut *p } } @@ -2486,18 +2542,7 @@ impl<T, A: Allocator> Vec<T, A> { #[rustc_confusables("push_back", "put", "append")] #[track_caller] pub fn push(&mut self, value: T) { - // Inform codegen that the length does not change across grow_one(). - let len = self.len; - // This will panic or abort if we would allocate > isize::MAX bytes - // or if the length increment would overflow for zero-sized types. - if len == self.buf.capacity() { - self.buf.grow_one(); - } - unsafe { - let end = self.as_mut_ptr().add(len); - ptr::write(end, value); - self.len = len + 1; - } + let _ = self.push_mut(value); } /// Appends an element if there is sufficient spare capacity, otherwise an error is returned @@ -2538,6 +2583,77 @@ impl<T, A: Allocator> Vec<T, A> { #[inline] #[unstable(feature = "vec_push_within_capacity", issue = "100486")] pub fn push_within_capacity(&mut self, value: T) -> Result<(), T> { + self.push_mut_within_capacity(value).map(|_| ()) + } + + /// Appends an element to the back of a collection, returning a reference to it. + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `isize::MAX` _bytes_. + /// + /// # Examples + /// + /// ``` + /// #![feature(push_mut)] + /// + /// + /// let mut vec = vec![1, 2]; + /// let last = vec.push_mut(3); + /// assert_eq!(*last, 3); + /// assert_eq!(vec, [1, 2, 3]); + /// + /// let last = vec.push_mut(3); + /// *last += 1; + /// assert_eq!(vec, [1, 2, 3, 4]); + /// ``` + /// + /// # Time complexity + /// + /// Takes amortized *O*(1) time. If the vector's length would exceed its + /// capacity after the push, *O*(*capacity*) time is taken to copy the + /// vector's elements to a larger allocation. This expensive operation is + /// offset by the *capacity* *O*(1) insertions it allows. + #[cfg(not(no_global_oom_handling))] + #[inline] + #[unstable(feature = "push_mut", issue = "135974")] + #[track_caller] + #[must_use = "if you don't need a reference to the value, use `Vec::push` instead"] + pub fn push_mut(&mut self, value: T) -> &mut T { + // Inform codegen that the length does not change across grow_one(). + let len = self.len; + // This will panic or abort if we would allocate > isize::MAX bytes + // or if the length increment would overflow for zero-sized types. + if len == self.buf.capacity() { + self.buf.grow_one(); + } + unsafe { + let end = self.as_mut_ptr().add(len); + ptr::write(end, value); + self.len = len + 1; + // SAFETY: We just wrote a value to the pointer that will live the lifetime of the reference. + &mut *end + } + } + + /// Appends an element and returns a reference to it if there is sufficient spare capacity, + /// otherwise an error is returned with the element. + /// + /// Unlike [`push_mut`] this method will not reallocate when there's insufficient capacity. + /// The caller should use [`reserve`] or [`try_reserve`] to ensure that there is enough capacity. + /// + /// [`push_mut`]: Vec::push_mut + /// [`reserve`]: Vec::reserve + /// [`try_reserve`]: Vec::try_reserve + /// + /// # Time complexity + /// + /// Takes *O*(1) time. + #[unstable(feature = "push_mut", issue = "135974")] + // #[unstable(feature = "vec_push_within_capacity", issue = "100486")] + #[inline] + #[must_use = "if you don't need a reference to the value, use `Vec::push_within_capacity` instead"] + pub fn push_mut_within_capacity(&mut self, value: T) -> Result<&mut T, T> { if self.len == self.buf.capacity() { return Err(value); } @@ -2545,8 +2661,9 @@ impl<T, A: Allocator> Vec<T, A> { let end = self.as_mut_ptr().add(self.len); ptr::write(end, value); self.len += 1; + // SAFETY: We just wrote a value to the pointer that will live the lifetime of the reference. + Ok(&mut *end) } - Ok(()) } /// Removes the last element from a vector and returns it, or [`None`] if it @@ -2603,6 +2720,33 @@ impl<T, A: Allocator> Vec<T, A> { if predicate(last) { self.pop() } else { None } } + /// Returns a mutable reference to the last item in the vector, or + /// `None` if it is empty. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// #![feature(vec_peek_mut)] + /// let mut vec = Vec::new(); + /// assert!(vec.peek_mut().is_none()); + /// + /// vec.push(1); + /// vec.push(5); + /// vec.push(2); + /// assert_eq!(vec.last(), Some(&2)); + /// if let Some(mut val) = vec.peek_mut() { + /// *val = 0; + /// } + /// assert_eq!(vec.last(), Some(&0)); + /// ``` + #[inline] + #[unstable(feature = "vec_peek_mut", issue = "122742")] + pub fn peek_mut(&mut self) -> Option<PeekMut<'_, T, A>> { + PeekMut::new(self) + } + /// Moves all the elements of `other` into `self`, leaving `other` empty. /// /// # Panics @@ -2652,8 +2796,8 @@ impl<T, A: Allocator> Vec<T, A> { /// /// # Panics /// - /// Panics if the starting point is greater than the end point or if - /// the end point is greater than the length of the vector. + /// Panics if the range has `start_bound > end_bound`, or, if the range is + /// bounded on either end and past the length of the vector. /// /// # Leaking /// @@ -3032,7 +3176,7 @@ impl<T, A: Allocator> Vec<T, A> { // - but the allocation extends out to `self.buf.capacity()` elements, possibly // uninitialized let spare_ptr = unsafe { ptr.add(self.len) }; - let spare_ptr = spare_ptr.cast::<MaybeUninit<T>>(); + let spare_ptr = spare_ptr.cast_uninit(); let spare_len = self.buf.capacity() - self.len; // SAFETY: @@ -3716,8 +3860,8 @@ impl<T, A: Allocator> Vec<T, A> { /// /// # Panics /// - /// Panics if the starting point is greater than the end point or if - /// the end point is greater than the length of the vector. + /// Panics if the range has `start_bound > end_bound`, or, if the range is + /// bounded on either end and past the length of the vector. /// /// # Examples /// @@ -3778,8 +3922,8 @@ impl<T, A: Allocator> Vec<T, A> { /// while i < vec.len() - end_items { /// if some_predicate(&mut vec[i]) { /// let val = vec.remove(i); - /// # extracted.push(val); /// // your code here + /// # extracted.push(val); /// } else { /// i += 1; /// } diff --git a/library/alloc/src/vec/peek_mut.rs b/library/alloc/src/vec/peek_mut.rs index c0dd941ed39..979bcaa1111 100644 --- a/library/alloc/src/vec/peek_mut.rs +++ b/library/alloc/src/vec/peek_mut.rs @@ -1,6 +1,7 @@ use core::ops::{Deref, DerefMut}; use super::Vec; +use crate::alloc::{Allocator, Global}; use crate::fmt; /// Structure wrapping a mutable reference to the last item in a @@ -11,42 +12,47 @@ use crate::fmt; /// /// [`peek_mut`]: Vec::peek_mut #[unstable(feature = "vec_peek_mut", issue = "122742")] -pub struct PeekMut<'a, T> { - vec: &'a mut Vec<T>, +pub struct PeekMut< + 'a, + T, + #[unstable(feature = "allocator_api", issue = "32838")] A: Allocator = Global, +> { + vec: &'a mut Vec<T, A>, } #[unstable(feature = "vec_peek_mut", issue = "122742")] -impl<T: fmt::Debug> fmt::Debug for PeekMut<'_, T> { +impl<T: fmt::Debug, A: Allocator> fmt::Debug for PeekMut<'_, T, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_tuple("PeekMut").field(self.deref()).finish() } } -impl<'a, T> PeekMut<'a, T> { - pub(crate) fn new(vec: &'a mut Vec<T>) -> Option<Self> { +impl<'a, T, A: Allocator> PeekMut<'a, T, A> { + pub(super) fn new(vec: &'a mut Vec<T, A>) -> Option<Self> { if vec.is_empty() { None } else { Some(Self { vec }) } } /// Removes the peeked value from the vector and returns it. #[unstable(feature = "vec_peek_mut", issue = "122742")] - pub fn pop(self) -> T { + pub fn pop(this: Self) -> T { // SAFETY: PeekMut is only constructed if the vec is non-empty - unsafe { self.vec.pop().unwrap_unchecked() } + unsafe { this.vec.pop().unwrap_unchecked() } } } #[unstable(feature = "vec_peek_mut", issue = "122742")] -impl<'a, T> Deref for PeekMut<'a, T> { +impl<'a, T, A: Allocator> Deref for PeekMut<'a, T, A> { type Target = T; fn deref(&self) -> &Self::Target { + let idx = self.vec.len() - 1; // SAFETY: PeekMut is only constructed if the vec is non-empty - unsafe { self.vec.get_unchecked(self.vec.len() - 1) } + unsafe { self.vec.get_unchecked(idx) } } } #[unstable(feature = "vec_peek_mut", issue = "122742")] -impl<'a, T> DerefMut for PeekMut<'a, T> { +impl<'a, T, A: Allocator> DerefMut for PeekMut<'a, T, A> { fn deref_mut(&mut self) -> &mut Self::Target { let idx = self.vec.len() - 1; // SAFETY: PeekMut is only constructed if the vec is non-empty diff --git a/library/alloc/src/wtf8/mod.rs b/library/alloc/src/wtf8/mod.rs new file mode 100644 index 00000000000..047994adc44 --- /dev/null +++ b/library/alloc/src/wtf8/mod.rs @@ -0,0 +1,562 @@ +//! Heap-allocated counterpart to core `wtf8` module. +#![unstable( + feature = "wtf8_internals", + issue = "none", + reason = "this is internal code for representing OsStr on some platforms and not a public API" +)] +// rustdoc bug: doc(hidden) on the module won't stop types in the module from showing up in trait +// implementations, so, we'll have to add more doc(hidden)s anyway +#![doc(hidden)] + +// Note: This module is also included in the alloctests crate using #[path] to +// run the tests. See the comment there for an explanation why this is the case. + +#[cfg(test)] +mod tests; + +use core::char::{MAX_LEN_UTF8, encode_utf8_raw}; +use core::hash::{Hash, Hasher}; +pub use core::wtf8::{CodePoint, Wtf8}; +#[cfg(not(test))] +pub use core::wtf8::{EncodeWide, Wtf8CodePoints}; +use core::{fmt, mem, ops, str}; + +use crate::borrow::{Cow, ToOwned}; +use crate::boxed::Box; +use crate::collections::TryReserveError; +#[cfg(not(test))] +use crate::rc::Rc; +use crate::string::String; +#[cfg(all(not(test), target_has_atomic = "ptr"))] +use crate::sync::Arc; +use crate::vec::Vec; + +/// An owned, growable string of well-formed WTF-8 data. +/// +/// Similar to `String`, but can additionally contain surrogate code points +/// if they’re not in a surrogate pair. +#[derive(Eq, PartialEq, Ord, PartialOrd, Clone)] +#[doc(hidden)] +pub struct Wtf8Buf { + bytes: Vec<u8>, + + /// Do we know that `bytes` holds a valid UTF-8 encoding? We can easily + /// know this if we're constructed from a `String` or `&str`. + /// + /// It is possible for `bytes` to have valid UTF-8 without this being + /// set, such as when we're concatenating `&Wtf8`'s and surrogates become + /// paired, as we don't bother to rescan the entire string. + is_known_utf8: bool, +} + +impl ops::Deref for Wtf8Buf { + type Target = Wtf8; + + fn deref(&self) -> &Wtf8 { + self.as_slice() + } +} + +impl ops::DerefMut for Wtf8Buf { + fn deref_mut(&mut self) -> &mut Wtf8 { + self.as_mut_slice() + } +} + +/// Formats the string in double quotes, with characters escaped according to +/// [`char::escape_debug`] and unpaired surrogates represented as `\u{xxxx}`, +/// where each `x` is a hexadecimal digit. +/// +/// For example, the code units [U+0061, U+D800, U+000A] are formatted as +/// `"a\u{D800}\n"`. +impl fmt::Debug for Wtf8Buf { + #[inline] + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, formatter) + } +} + +/// Formats the string with unpaired surrogates substituted with the replacement +/// character, U+FFFD. +impl fmt::Display for Wtf8Buf { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(s) = self.as_known_utf8() { + fmt::Display::fmt(s, formatter) + } else { + fmt::Display::fmt(&**self, formatter) + } + } +} + +#[cfg_attr(test, allow(dead_code))] +impl Wtf8Buf { + /// Creates a new, empty WTF-8 string. + #[inline] + pub fn new() -> Wtf8Buf { + Wtf8Buf { bytes: Vec::new(), is_known_utf8: true } + } + + /// Creates a new, empty WTF-8 string with pre-allocated capacity for `capacity` bytes. + #[inline] + pub fn with_capacity(capacity: usize) -> Wtf8Buf { + Wtf8Buf { bytes: Vec::with_capacity(capacity), is_known_utf8: true } + } + + /// Creates a WTF-8 string from a WTF-8 byte vec. + /// + /// Since the byte vec is not checked for valid WTF-8, this function is + /// marked unsafe. + #[inline] + pub unsafe fn from_bytes_unchecked(value: Vec<u8>) -> Wtf8Buf { + Wtf8Buf { bytes: value, is_known_utf8: false } + } + + /// Creates a WTF-8 string from a UTF-8 `String`. + /// + /// This takes ownership of the `String` and does not copy. + /// + /// Since WTF-8 is a superset of UTF-8, this always succeeds. + #[inline] + pub const fn from_string(string: String) -> Wtf8Buf { + Wtf8Buf { bytes: string.into_bytes(), is_known_utf8: true } + } + + /// Creates a WTF-8 string from a UTF-8 `&str` slice. + /// + /// This copies the content of the slice. + /// + /// Since WTF-8 is a superset of UTF-8, this always succeeds. + #[inline] + pub fn from_str(s: &str) -> Wtf8Buf { + Wtf8Buf { bytes: s.as_bytes().to_vec(), is_known_utf8: true } + } + + pub fn clear(&mut self) { + self.bytes.clear(); + self.is_known_utf8 = true; + } + + /// Creates a WTF-8 string from a potentially ill-formed UTF-16 slice of 16-bit code units. + /// + /// This is lossless: calling `.encode_wide()` on the resulting string + /// will always return the original code units. + pub fn from_wide(v: &[u16]) -> Wtf8Buf { + let mut string = Wtf8Buf::with_capacity(v.len()); + for item in char::decode_utf16(v.iter().cloned()) { + match item { + Ok(ch) => string.push_char(ch), + Err(surrogate) => { + let surrogate = surrogate.unpaired_surrogate(); + // Surrogates are known to be in the code point range. + let code_point = unsafe { CodePoint::from_u32_unchecked(surrogate as u32) }; + // The string will now contain an unpaired surrogate. + string.is_known_utf8 = false; + // Skip the WTF-8 concatenation check, + // surrogate pairs are already decoded by decode_utf16 + unsafe { + string.push_code_point_unchecked(code_point); + } + } + } + } + string + } + + /// Appends the given `char` to the end of this string. + /// This does **not** include the WTF-8 concatenation check or `is_known_utf8` check. + /// Copied from String::push. + unsafe fn push_code_point_unchecked(&mut self, code_point: CodePoint) { + let mut bytes = [0; MAX_LEN_UTF8]; + let bytes = encode_utf8_raw(code_point.to_u32(), &mut bytes); + self.bytes.extend_from_slice(bytes) + } + + #[inline] + pub fn as_slice(&self) -> &Wtf8 { + unsafe { Wtf8::from_bytes_unchecked(&self.bytes) } + } + + #[inline] + pub fn as_mut_slice(&mut self) -> &mut Wtf8 { + // Safety: `Wtf8` doesn't expose any way to mutate the bytes that would + // cause them to change from well-formed UTF-8 to ill-formed UTF-8, + // which would break the assumptions of the `is_known_utf8` field. + unsafe { Wtf8::from_mut_bytes_unchecked(&mut self.bytes) } + } + + /// Converts the string to UTF-8 without validation, if it was created from + /// valid UTF-8. + #[inline] + fn as_known_utf8(&self) -> Option<&str> { + if self.is_known_utf8 { + // SAFETY: The buffer is known to be valid UTF-8. + Some(unsafe { str::from_utf8_unchecked(self.as_bytes()) }) + } else { + None + } + } + + /// Reserves capacity for at least `additional` more bytes to be inserted + /// in the given `Wtf8Buf`. + /// The collection may reserve more space to avoid frequent reallocations. + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `isize::MAX` bytes. + #[inline] + pub fn reserve(&mut self, additional: usize) { + self.bytes.reserve(additional) + } + + /// Tries to reserve capacity for at least `additional` more bytes to be + /// inserted in the given `Wtf8Buf`. The `Wtf8Buf` may reserve more space to + /// avoid frequent reallocations. After calling `try_reserve`, capacity will + /// be greater than or equal to `self.len() + additional`. Does nothing if + /// capacity is already sufficient. This method preserves the contents even + /// if an error occurs. + /// + /// # Errors + /// + /// If the capacity overflows, or the allocator reports a failure, then an error + /// is returned. + #[inline] + pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.bytes.try_reserve(additional) + } + + #[inline] + pub fn reserve_exact(&mut self, additional: usize) { + self.bytes.reserve_exact(additional) + } + + /// Tries to reserve the minimum capacity for exactly `additional` more + /// bytes to be inserted in the given `Wtf8Buf`. After calling + /// `try_reserve_exact`, capacity will be greater than or equal to + /// `self.len() + additional` if it returns `Ok(())`. + /// Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the `Wtf8Buf` more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer [`try_reserve`] if future insertions are expected. + /// + /// [`try_reserve`]: Wtf8Buf::try_reserve + /// + /// # Errors + /// + /// If the capacity overflows, or the allocator reports a failure, then an error + /// is returned. + #[inline] + pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.bytes.try_reserve_exact(additional) + } + + #[inline] + pub fn shrink_to_fit(&mut self) { + self.bytes.shrink_to_fit() + } + + #[inline] + pub fn shrink_to(&mut self, min_capacity: usize) { + self.bytes.shrink_to(min_capacity) + } + + #[inline] + pub fn leak<'a>(self) -> &'a mut Wtf8 { + unsafe { Wtf8::from_mut_bytes_unchecked(self.bytes.leak()) } + } + + /// Returns the number of bytes that this string buffer can hold without reallocating. + #[inline] + pub fn capacity(&self) -> usize { + self.bytes.capacity() + } + + /// Append a UTF-8 slice at the end of the string. + #[inline] + pub fn push_str(&mut self, other: &str) { + self.bytes.extend_from_slice(other.as_bytes()) + } + + /// Append a WTF-8 slice at the end of the string. + /// + /// This replaces newly paired surrogates at the boundary + /// with a supplementary code point, + /// like concatenating ill-formed UTF-16 strings effectively would. + #[inline] + pub fn push_wtf8(&mut self, other: &Wtf8) { + match ((&*self).final_lead_surrogate(), other.initial_trail_surrogate()) { + // Replace newly paired surrogates by a supplementary code point. + (Some(lead), Some(trail)) => { + let len_without_lead_surrogate = self.len() - 3; + self.bytes.truncate(len_without_lead_surrogate); + let other_without_trail_surrogate = &other.as_bytes()[3..]; + // 4 bytes for the supplementary code point + self.bytes.reserve(4 + other_without_trail_surrogate.len()); + self.push_char(decode_surrogate_pair(lead, trail)); + self.bytes.extend_from_slice(other_without_trail_surrogate); + } + _ => { + // If we'll be pushing a string containing a surrogate, we may + // no longer have UTF-8. + if self.is_known_utf8 && other.next_surrogate(0).is_some() { + self.is_known_utf8 = false; + } + + self.bytes.extend_from_slice(other.as_bytes()); + } + } + } + + /// Append a Unicode scalar value at the end of the string. + #[inline] + pub fn push_char(&mut self, c: char) { + // SAFETY: It's always safe to push a char. + unsafe { self.push_code_point_unchecked(CodePoint::from_char(c)) } + } + + /// Append a code point at the end of the string. + /// + /// This replaces newly paired surrogates at the boundary + /// with a supplementary code point, + /// like concatenating ill-formed UTF-16 strings effectively would. + #[inline] + pub fn push(&mut self, code_point: CodePoint) { + if let Some(trail) = code_point.to_trail_surrogate() { + if let Some(lead) = (&*self).final_lead_surrogate() { + let len_without_lead_surrogate = self.len() - 3; + self.bytes.truncate(len_without_lead_surrogate); + self.push_char(decode_surrogate_pair(lead, trail)); + return; + } + + // We're pushing a trailing surrogate. + self.is_known_utf8 = false; + } else if code_point.to_lead_surrogate().is_some() { + // We're pushing a leading surrogate. + self.is_known_utf8 = false; + } + + // No newly paired surrogates at the boundary. + unsafe { self.push_code_point_unchecked(code_point) } + } + + /// Shortens a string to the specified length. + /// + /// # Panics + /// + /// Panics if `new_len` > current length, + /// or if `new_len` is not a code point boundary. + #[inline] + pub fn truncate(&mut self, new_len: usize) { + assert!(self.is_code_point_boundary(new_len)); + self.bytes.truncate(new_len) + } + + /// Consumes the WTF-8 string and tries to convert it to a vec of bytes. + #[inline] + pub fn into_bytes(self) -> Vec<u8> { + self.bytes + } + + /// Consumes the WTF-8 string and tries to convert it to UTF-8. + /// + /// This does not copy the data. + /// + /// If the contents are not well-formed UTF-8 + /// (that is, if the string contains surrogates), + /// the original WTF-8 string is returned instead. + pub fn into_string(self) -> Result<String, Wtf8Buf> { + if self.is_known_utf8 || self.next_surrogate(0).is_none() { + Ok(unsafe { String::from_utf8_unchecked(self.bytes) }) + } else { + Err(self) + } + } + + /// Consumes the WTF-8 string and converts it lossily to UTF-8. + /// + /// This does not copy the data (but may overwrite parts of it in place). + /// + /// Surrogates are replaced with `"\u{FFFD}"` (the replacement character “�”) + pub fn into_string_lossy(mut self) -> String { + if !self.is_known_utf8 { + let mut pos = 0; + while let Some((surrogate_pos, _)) = self.next_surrogate(pos) { + pos = surrogate_pos + 3; + // Surrogates and the replacement character are all 3 bytes, so + // they can substituted in-place. + self.bytes[surrogate_pos..pos].copy_from_slice("\u{FFFD}".as_bytes()); + } + } + unsafe { String::from_utf8_unchecked(self.bytes) } + } + + /// Converts this `Wtf8Buf` into a boxed `Wtf8`. + #[inline] + pub fn into_box(self) -> Box<Wtf8> { + // SAFETY: relies on `Wtf8` being `repr(transparent)`. + unsafe { mem::transmute(self.bytes.into_boxed_slice()) } + } + + /// Converts a `Box<Wtf8>` into a `Wtf8Buf`. + pub fn from_box(boxed: Box<Wtf8>) -> Wtf8Buf { + let bytes: Box<[u8]> = unsafe { mem::transmute(boxed) }; + Wtf8Buf { bytes: bytes.into_vec(), is_known_utf8: false } + } + + /// Provides plumbing to core `Vec::extend_from_slice`. + /// More well behaving alternative to allowing outer types + /// full mutable access to the core `Vec`. + #[inline] + pub unsafe fn extend_from_slice_unchecked(&mut self, other: &[u8]) { + self.bytes.extend_from_slice(other); + self.is_known_utf8 = false; + } +} + +/// Creates a new WTF-8 string from an iterator of code points. +/// +/// This replaces surrogate code point pairs with supplementary code points, +/// like concatenating ill-formed UTF-16 strings effectively would. +impl FromIterator<CodePoint> for Wtf8Buf { + fn from_iter<T: IntoIterator<Item = CodePoint>>(iter: T) -> Wtf8Buf { + let mut string = Wtf8Buf::new(); + string.extend(iter); + string + } +} + +/// Append code points from an iterator to the string. +/// +/// This replaces surrogate code point pairs with supplementary code points, +/// like concatenating ill-formed UTF-16 strings effectively would. +impl Extend<CodePoint> for Wtf8Buf { + fn extend<T: IntoIterator<Item = CodePoint>>(&mut self, iter: T) { + let iterator = iter.into_iter(); + let (low, _high) = iterator.size_hint(); + // Lower bound of one byte per code point (ASCII only) + self.bytes.reserve(low); + iterator.for_each(move |code_point| self.push(code_point)); + } + + #[inline] + fn extend_one(&mut self, code_point: CodePoint) { + self.push(code_point); + } + + #[inline] + fn extend_reserve(&mut self, additional: usize) { + // Lower bound of one byte per code point (ASCII only) + self.bytes.reserve(additional); + } +} + +/// Creates an owned `Wtf8Buf` from a borrowed `Wtf8`. +pub(super) fn to_owned(slice: &Wtf8) -> Wtf8Buf { + Wtf8Buf { bytes: slice.as_bytes().to_vec(), is_known_utf8: false } +} + +/// Lossily converts the string to UTF-8. +/// Returns a UTF-8 `&str` slice if the contents are well-formed in UTF-8. +/// +/// Surrogates are replaced with `"\u{FFFD}"` (the replacement character “�”). +/// +/// This only copies the data if necessary (if it contains any surrogate). +pub(super) fn to_string_lossy(slice: &Wtf8) -> Cow<'_, str> { + let Some((surrogate_pos, _)) = slice.next_surrogate(0) else { + return Cow::Borrowed(unsafe { str::from_utf8_unchecked(slice.as_bytes()) }); + }; + let wtf8_bytes = slice.as_bytes(); + let mut utf8_bytes = Vec::with_capacity(slice.len()); + utf8_bytes.extend_from_slice(&wtf8_bytes[..surrogate_pos]); + utf8_bytes.extend_from_slice("\u{FFFD}".as_bytes()); + let mut pos = surrogate_pos + 3; + loop { + match slice.next_surrogate(pos) { + Some((surrogate_pos, _)) => { + utf8_bytes.extend_from_slice(&wtf8_bytes[pos..surrogate_pos]); + utf8_bytes.extend_from_slice("\u{FFFD}".as_bytes()); + pos = surrogate_pos + 3; + } + None => { + utf8_bytes.extend_from_slice(&wtf8_bytes[pos..]); + return Cow::Owned(unsafe { String::from_utf8_unchecked(utf8_bytes) }); + } + } + } +} + +#[inline] +pub(super) fn clone_into(slice: &Wtf8, buf: &mut Wtf8Buf) { + buf.is_known_utf8 = false; + slice.as_bytes().clone_into(&mut buf.bytes); +} + +#[cfg(not(test))] +impl Wtf8 { + #[rustc_allow_incoherent_impl] + pub fn to_owned(&self) -> Wtf8Buf { + to_owned(self) + } + + #[rustc_allow_incoherent_impl] + pub fn clone_into(&self, buf: &mut Wtf8Buf) { + clone_into(self, buf) + } + + #[rustc_allow_incoherent_impl] + pub fn to_string_lossy(&self) -> Cow<'_, str> { + to_string_lossy(self) + } + + #[rustc_allow_incoherent_impl] + pub fn into_box(&self) -> Box<Wtf8> { + let boxed: Box<[u8]> = self.as_bytes().into(); + unsafe { mem::transmute(boxed) } + } + + #[rustc_allow_incoherent_impl] + pub fn empty_box() -> Box<Wtf8> { + let boxed: Box<[u8]> = Default::default(); + unsafe { mem::transmute(boxed) } + } + + #[cfg(target_has_atomic = "ptr")] + #[rustc_allow_incoherent_impl] + pub fn into_arc(&self) -> Arc<Wtf8> { + let arc: Arc<[u8]> = Arc::from(self.as_bytes()); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Wtf8) } + } + + #[rustc_allow_incoherent_impl] + pub fn into_rc(&self) -> Rc<Wtf8> { + let rc: Rc<[u8]> = Rc::from(self.as_bytes()); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Wtf8) } + } + + #[inline] + #[rustc_allow_incoherent_impl] + pub fn to_ascii_lowercase(&self) -> Wtf8Buf { + Wtf8Buf { bytes: self.as_bytes().to_ascii_lowercase(), is_known_utf8: false } + } + + #[inline] + #[rustc_allow_incoherent_impl] + pub fn to_ascii_uppercase(&self) -> Wtf8Buf { + Wtf8Buf { bytes: self.as_bytes().to_ascii_uppercase(), is_known_utf8: false } + } +} + +#[inline] +fn decode_surrogate_pair(lead: u16, trail: u16) -> char { + let code_point = 0x10000 + ((((lead - 0xD800) as u32) << 10) | (trail - 0xDC00) as u32); + unsafe { char::from_u32_unchecked(code_point) } +} + +impl Hash for Wtf8Buf { + #[inline] + fn hash<H: Hasher>(&self, state: &mut H) { + state.write(&self.bytes); + 0xfeu8.hash(state) + } +} diff --git a/library/alloc/src/wtf8/tests.rs b/library/alloc/src/wtf8/tests.rs new file mode 100644 index 00000000000..291f63f9f9e --- /dev/null +++ b/library/alloc/src/wtf8/tests.rs @@ -0,0 +1,732 @@ +use realalloc::string::ToString; + +use super::*; + +#[test] +fn code_point_from_u32() { + assert!(CodePoint::from_u32(0).is_some()); + assert!(CodePoint::from_u32(0xD800).is_some()); + assert!(CodePoint::from_u32(0x10FFFF).is_some()); + assert!(CodePoint::from_u32(0x110000).is_none()); +} + +#[test] +fn code_point_to_u32() { + fn c(value: u32) -> CodePoint { + CodePoint::from_u32(value).unwrap() + } + assert_eq!(c(0).to_u32(), 0); + assert_eq!(c(0xD800).to_u32(), 0xD800); + assert_eq!(c(0x10FFFF).to_u32(), 0x10FFFF); +} + +#[test] +fn code_point_to_lead_surrogate() { + fn c(value: u32) -> CodePoint { + CodePoint::from_u32(value).unwrap() + } + assert_eq!(c(0).to_lead_surrogate(), None); + assert_eq!(c(0xE9).to_lead_surrogate(), None); + assert_eq!(c(0xD800).to_lead_surrogate(), Some(0xD800)); + assert_eq!(c(0xDBFF).to_lead_surrogate(), Some(0xDBFF)); + assert_eq!(c(0xDC00).to_lead_surrogate(), None); + assert_eq!(c(0xDFFF).to_lead_surrogate(), None); + assert_eq!(c(0x1F4A9).to_lead_surrogate(), None); + assert_eq!(c(0x10FFFF).to_lead_surrogate(), None); +} + +#[test] +fn code_point_to_trail_surrogate() { + fn c(value: u32) -> CodePoint { + CodePoint::from_u32(value).unwrap() + } + assert_eq!(c(0).to_trail_surrogate(), None); + assert_eq!(c(0xE9).to_trail_surrogate(), None); + assert_eq!(c(0xD800).to_trail_surrogate(), None); + assert_eq!(c(0xDBFF).to_trail_surrogate(), None); + assert_eq!(c(0xDC00).to_trail_surrogate(), Some(0xDC00)); + assert_eq!(c(0xDFFF).to_trail_surrogate(), Some(0xDFFF)); + assert_eq!(c(0x1F4A9).to_trail_surrogate(), None); + assert_eq!(c(0x10FFFF).to_trail_surrogate(), None); +} + +#[test] +fn code_point_from_char() { + assert_eq!(CodePoint::from_char('a').to_u32(), 0x61); + assert_eq!(CodePoint::from_char('💩').to_u32(), 0x1F4A9); +} + +#[test] +fn code_point_to_string() { + assert_eq!(format!("{:?}", CodePoint::from_char('a')), "U+0061"); + assert_eq!(format!("{:?}", CodePoint::from_char('💩')), "U+1F4A9"); +} + +#[test] +fn code_point_to_char() { + fn c(value: u32) -> CodePoint { + CodePoint::from_u32(value).unwrap() + } + assert_eq!(c(0x61).to_char(), Some('a')); + assert_eq!(c(0x1F4A9).to_char(), Some('💩')); + assert_eq!(c(0xD800).to_char(), None); +} + +#[test] +fn code_point_to_char_lossy() { + fn c(value: u32) -> CodePoint { + CodePoint::from_u32(value).unwrap() + } + assert_eq!(c(0x61).to_char_lossy(), 'a'); + assert_eq!(c(0x1F4A9).to_char_lossy(), '💩'); + assert_eq!(c(0xD800).to_char_lossy(), '\u{FFFD}'); +} + +#[test] +fn wtf8buf_new() { + assert_eq!(Wtf8Buf::new().as_bytes(), b""); +} + +#[test] +fn wtf8buf_from_str() { + assert_eq!(Wtf8Buf::from_str("").as_bytes(), b""); + assert_eq!(Wtf8Buf::from_str("aé 💩").as_bytes(), b"a\xC3\xA9 \xF0\x9F\x92\xA9"); +} + +#[test] +fn wtf8buf_from_string() { + assert_eq!(Wtf8Buf::from_string(String::from("")).as_bytes(), b""); + assert_eq!( + Wtf8Buf::from_string(String::from("aé 💩")).as_bytes(), + b"a\xC3\xA9 \xF0\x9F\x92\xA9" + ); +} + +#[test] +fn wtf8buf_from_wide() { + let buf = Wtf8Buf::from_wide(&[]); + assert_eq!(buf.as_bytes(), b""); + assert!(buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xDCA9]); + assert_eq!(buf.as_bytes(), b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]); + assert_eq!(buf.as_bytes(), b"a\xC3\xA9 \xED\xA0\xBD\xF0\x9F\x92\xA9"); + assert!(!buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0xD800]); + assert_eq!(buf.as_bytes(), b"\xED\xA0\x80"); + assert!(!buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0xDBFF]); + assert_eq!(buf.as_bytes(), b"\xED\xAF\xBF"); + assert!(!buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0xDC00]); + assert_eq!(buf.as_bytes(), b"\xED\xB0\x80"); + assert!(!buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0xDFFF]); + assert_eq!(buf.as_bytes(), b"\xED\xBF\xBF"); + assert!(!buf.is_known_utf8); +} + +#[test] +fn wtf8buf_push_str() { + let mut string = Wtf8Buf::new(); + assert_eq!(string.as_bytes(), b""); + assert!(string.is_known_utf8); + + string.push_str("aé 💩"); + assert_eq!(string.as_bytes(), b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); +} + +#[test] +fn wtf8buf_push_char() { + let mut string = Wtf8Buf::from_str("aé "); + assert_eq!(string.as_bytes(), b"a\xC3\xA9 "); + assert!(string.is_known_utf8); + + string.push_char('💩'); + assert_eq!(string.as_bytes(), b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); +} + +#[test] +fn wtf8buf_push() { + let mut string = Wtf8Buf::from_str("aé "); + assert_eq!(string.as_bytes(), b"a\xC3\xA9 "); + assert!(string.is_known_utf8); + + string.push(CodePoint::from_char('💩')); + assert_eq!(string.as_bytes(), b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); + + fn c(value: u32) -> CodePoint { + CodePoint::from_u32(value).unwrap() + } + + let mut string = Wtf8Buf::new(); + string.push(c(0xD83D)); // lead + assert!(!string.is_known_utf8); + string.push(c(0xDCA9)); // trail + assert_eq!(string.as_bytes(), b"\xF0\x9F\x92\xA9"); // Magic! + + let mut string = Wtf8Buf::new(); + string.push(c(0xD83D)); // lead + assert!(!string.is_known_utf8); + string.push(c(0x20)); // not surrogate + string.push(c(0xDCA9)); // trail + assert_eq!(string.as_bytes(), b"\xED\xA0\xBD \xED\xB2\xA9"); + + let mut string = Wtf8Buf::new(); + string.push(c(0xD800)); // lead + assert!(!string.is_known_utf8); + string.push(c(0xDBFF)); // lead + assert_eq!(string.as_bytes(), b"\xED\xA0\x80\xED\xAF\xBF"); + + let mut string = Wtf8Buf::new(); + string.push(c(0xD800)); // lead + assert!(!string.is_known_utf8); + string.push(c(0xE000)); // not surrogate + assert_eq!(string.as_bytes(), b"\xED\xA0\x80\xEE\x80\x80"); + + let mut string = Wtf8Buf::new(); + string.push(c(0xD7FF)); // not surrogate + assert!(string.is_known_utf8); + string.push(c(0xDC00)); // trail + assert!(!string.is_known_utf8); + assert_eq!(string.as_bytes(), b"\xED\x9F\xBF\xED\xB0\x80"); + + let mut string = Wtf8Buf::new(); + string.push(c(0x61)); // not surrogate, < 3 bytes + assert!(string.is_known_utf8); + string.push(c(0xDC00)); // trail + assert!(!string.is_known_utf8); + assert_eq!(string.as_bytes(), b"\x61\xED\xB0\x80"); + + let mut string = Wtf8Buf::new(); + string.push(c(0xDC00)); // trail + assert!(!string.is_known_utf8); + assert_eq!(string.as_bytes(), b"\xED\xB0\x80"); +} + +#[test] +fn wtf8buf_push_wtf8() { + let mut string = Wtf8Buf::from_str("aé"); + assert_eq!(string.as_bytes(), b"a\xC3\xA9"); + string.push_wtf8(Wtf8::from_str(" 💩")); + assert_eq!(string.as_bytes(), b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); + + fn w(v: &[u8]) -> &Wtf8 { + unsafe { Wtf8::from_bytes_unchecked(v) } + } + + let mut string = Wtf8Buf::new(); + string.push_wtf8(w(b"\xED\xA0\xBD")); // lead + string.push_wtf8(w(b"\xED\xB2\xA9")); // trail + assert_eq!(string.as_bytes(), b"\xF0\x9F\x92\xA9"); // Magic! + + let mut string = Wtf8Buf::new(); + string.push_wtf8(w(b"\xED\xA0\xBD")); // lead + string.push_wtf8(w(b" ")); // not surrogate + string.push_wtf8(w(b"\xED\xB2\xA9")); // trail + assert_eq!(string.as_bytes(), b"\xED\xA0\xBD \xED\xB2\xA9"); + assert!(!string.is_known_utf8); + + let mut string = Wtf8Buf::new(); + string.push_wtf8(w(b"\xED\xA0\x80")); // lead + string.push_wtf8(w(b"\xED\xAF\xBF")); // lead + assert_eq!(string.as_bytes(), b"\xED\xA0\x80\xED\xAF\xBF"); + assert!(!string.is_known_utf8); + + let mut string = Wtf8Buf::new(); + string.push_wtf8(w(b"\xED\xA0\x80")); // lead + string.push_wtf8(w(b"\xEE\x80\x80")); // not surrogate + assert_eq!(string.as_bytes(), b"\xED\xA0\x80\xEE\x80\x80"); + assert!(!string.is_known_utf8); + + let mut string = Wtf8Buf::new(); + string.push_wtf8(w(b"\xED\x9F\xBF")); // not surrogate + string.push_wtf8(w(b"\xED\xB0\x80")); // trail + assert_eq!(string.as_bytes(), b"\xED\x9F\xBF\xED\xB0\x80"); + assert!(!string.is_known_utf8); + + let mut string = Wtf8Buf::new(); + string.push_wtf8(w(b"a")); // not surrogate, < 3 bytes + string.push_wtf8(w(b"\xED\xB0\x80")); // trail + assert_eq!(string.as_bytes(), b"\x61\xED\xB0\x80"); + assert!(!string.is_known_utf8); + + let mut string = Wtf8Buf::new(); + string.push_wtf8(w(b"\xED\xB0\x80")); // trail + assert_eq!(string.as_bytes(), b"\xED\xB0\x80"); + assert!(!string.is_known_utf8); +} + +#[test] +fn wtf8buf_truncate() { + let mut string = Wtf8Buf::from_str("aé"); + assert!(string.is_known_utf8); + + string.truncate(3); + assert_eq!(string.as_bytes(), b"a\xC3\xA9"); + assert!(string.is_known_utf8); + + string.truncate(1); + assert_eq!(string.as_bytes(), b"a"); + assert!(string.is_known_utf8); + + string.truncate(0); + assert_eq!(string.as_bytes(), b""); + assert!(string.is_known_utf8); +} + +#[test] +fn wtf8buf_truncate_around_non_bmp() { + let mut string = Wtf8Buf::from_str("💩"); + assert!(string.is_known_utf8); + + string.truncate(4); + assert_eq!(string.as_bytes(), b"\xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); + + string.truncate(0); + assert_eq!(string.as_bytes(), b""); + assert!(string.is_known_utf8); +} + +#[test] +#[should_panic] +fn wtf8buf_truncate_fail_code_point_boundary() { + let mut string = Wtf8Buf::from_str("aé"); + string.truncate(2); +} + +#[test] +#[should_panic] +fn wtf8buf_truncate_fail_longer() { + let mut string = Wtf8Buf::from_str("aé"); + string.truncate(4); +} + +#[test] +#[should_panic] +fn wtf8buf_truncate_splitting_non_bmp3() { + let mut string = Wtf8Buf::from_str("💩"); + assert!(string.is_known_utf8); + string.truncate(3); +} + +#[test] +#[should_panic] +fn wtf8buf_truncate_splitting_non_bmp2() { + let mut string = Wtf8Buf::from_str("💩"); + assert!(string.is_known_utf8); + string.truncate(2); +} + +#[test] +#[should_panic] +fn wtf8buf_truncate_splitting_non_bmp1() { + let mut string = Wtf8Buf::from_str("💩"); + assert!(string.is_known_utf8); + string.truncate(1); +} + +#[test] +fn wtf8buf_into_string() { + let mut string = Wtf8Buf::from_str("aé 💩"); + assert!(string.is_known_utf8); + assert_eq!(string.clone().into_string(), Ok(String::from("aé 💩"))); + string.push(CodePoint::from_u32(0xD800).unwrap()); + assert!(!string.is_known_utf8); + assert_eq!(string.clone().into_string(), Err(string)); +} + +#[test] +fn wtf8buf_into_string_lossy() { + let mut string = Wtf8Buf::from_str("aé 💩"); + assert_eq!(string.clone().into_string_lossy(), String::from("aé 💩")); + string.push(CodePoint::from_u32(0xD800).unwrap()); + assert_eq!(string.clone().into_string_lossy(), String::from("aé 💩�")); +} + +#[test] +fn wtf8buf_from_iterator() { + fn f(values: &[u32]) -> Wtf8Buf { + values.iter().map(|&c| CodePoint::from_u32(c).unwrap()).collect::<Wtf8Buf>() + } + assert_eq!( + f(&[0x61, 0xE9, 0x20, 0x1F4A9]), + Wtf8Buf { bytes: b"a\xC3\xA9 \xF0\x9F\x92\xA9".to_vec(), is_known_utf8: true } + ); + + assert_eq!(f(&[0xD83D, 0xDCA9]).as_bytes(), b"\xF0\x9F\x92\xA9"); // Magic! + assert_eq!( + f(&[0xD83D, 0x20, 0xDCA9]), + Wtf8Buf { bytes: b"\xED\xA0\xBD \xED\xB2\xA9".to_vec(), is_known_utf8: false } + ); + assert_eq!( + f(&[0xD800, 0xDBFF]), + Wtf8Buf { bytes: b"\xED\xA0\x80\xED\xAF\xBF".to_vec(), is_known_utf8: false } + ); + assert_eq!( + f(&[0xD800, 0xE000]), + Wtf8Buf { bytes: b"\xED\xA0\x80\xEE\x80\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + f(&[0xD7FF, 0xDC00]), + Wtf8Buf { bytes: b"\xED\x9F\xBF\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + f(&[0x61, 0xDC00]), + Wtf8Buf { bytes: b"\x61\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!(f(&[0xDC00]), Wtf8Buf { bytes: b"\xED\xB0\x80".to_vec(), is_known_utf8: false }); +} + +#[test] +fn wtf8buf_extend() { + fn e(initial: &[u32], extended: &[u32]) -> Wtf8Buf { + fn c(value: &u32) -> CodePoint { + CodePoint::from_u32(*value).unwrap() + } + let mut string = initial.iter().map(c).collect::<Wtf8Buf>(); + string.extend(extended.iter().map(c)); + string + } + + assert_eq!( + e(&[0x61, 0xE9], &[0x20, 0x1F4A9]), + Wtf8Buf { bytes: b"a\xC3\xA9 \xF0\x9F\x92\xA9".to_vec(), is_known_utf8: true } + ); + + assert_eq!(e(&[0xD83D], &[0xDCA9]).as_bytes(), b"\xF0\x9F\x92\xA9"); // Magic! + assert_eq!( + e(&[0xD83D, 0x20], &[0xDCA9]), + Wtf8Buf { bytes: b"\xED\xA0\xBD \xED\xB2\xA9".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[0xD800], &[0xDBFF]), + Wtf8Buf { bytes: b"\xED\xA0\x80\xED\xAF\xBF".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[0xD800], &[0xE000]), + Wtf8Buf { bytes: b"\xED\xA0\x80\xEE\x80\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[0xD7FF], &[0xDC00]), + Wtf8Buf { bytes: b"\xED\x9F\xBF\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[0x61], &[0xDC00]), + Wtf8Buf { bytes: b"\x61\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[], &[0xDC00]), + Wtf8Buf { bytes: b"\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); +} + +#[test] +fn wtf8buf_show() { + let mut string = Wtf8Buf::from_str("a\té \u{7f}💩\r"); + string.push(CodePoint::from_u32(0xD800).unwrap()); + assert_eq!(format!("{string:?}"), "\"a\\té \\u{7f}\u{1f4a9}\\r\\u{d800}\""); +} + +#[test] +fn wtf8buf_as_slice() { + assert_eq!(Wtf8Buf::from_str("aé").as_slice(), Wtf8::from_str("aé")); +} + +#[test] +fn wtf8buf_show_str() { + let text = "a\té 💩\r"; + let string = Wtf8Buf::from_str(text); + assert_eq!(format!("{text:?}"), format!("{string:?}")); +} + +#[test] +fn wtf8_from_str() { + assert_eq!(&Wtf8::from_str("").as_bytes(), b""); + assert_eq!(&Wtf8::from_str("aé 💩").as_bytes(), b"a\xC3\xA9 \xF0\x9F\x92\xA9"); +} + +#[test] +fn wtf8_len() { + assert_eq!(Wtf8::from_str("").len(), 0); + assert_eq!(Wtf8::from_str("aé 💩").len(), 8); +} + +#[test] +fn wtf8_slice() { + assert_eq!(&Wtf8::from_str("aé 💩")[1..4].as_bytes(), b"\xC3\xA9 "); +} + +#[test] +#[should_panic] +fn wtf8_slice_not_code_point_boundary() { + let _ = &Wtf8::from_str("aé 💩")[2..4]; +} + +#[test] +fn wtf8_slice_from() { + assert_eq!(&Wtf8::from_str("aé 💩")[1..].as_bytes(), b"\xC3\xA9 \xF0\x9F\x92\xA9"); +} + +#[test] +#[should_panic] +fn wtf8_slice_from_not_code_point_boundary() { + let _ = &Wtf8::from_str("aé 💩")[2..]; +} + +#[test] +fn wtf8_slice_to() { + assert_eq!(&Wtf8::from_str("aé 💩")[..4].as_bytes(), b"a\xC3\xA9 "); +} + +#[test] +#[should_panic] +fn wtf8_slice_to_not_code_point_boundary() { + let _ = &Wtf8::from_str("aé 💩")[5..]; +} + +#[test] +fn wtf8_ascii_byte_at() { + let slice = Wtf8::from_str("aé 💩"); + assert_eq!(slice.ascii_byte_at(0), b'a'); + assert_eq!(slice.ascii_byte_at(1), b'\xFF'); + assert_eq!(slice.ascii_byte_at(2), b'\xFF'); + assert_eq!(slice.ascii_byte_at(3), b' '); + assert_eq!(slice.ascii_byte_at(4), b'\xFF'); +} + +#[test] +fn wtf8_code_points() { + fn c(value: u32) -> CodePoint { + CodePoint::from_u32(value).unwrap() + } + fn cp(string: &Wtf8Buf) -> Vec<Option<char>> { + string.code_points().map(|c| c.to_char()).collect::<Vec<_>>() + } + let mut string = Wtf8Buf::from_str("é "); + assert_eq!(cp(&string), [Some('é'), Some(' ')]); + string.push(c(0xD83D)); + assert_eq!(cp(&string), [Some('é'), Some(' '), None]); + string.push(c(0xDCA9)); + assert_eq!(cp(&string), [Some('é'), Some(' '), Some('💩')]); +} + +#[test] +fn wtf8_as_str() { + assert_eq!(Wtf8::from_str("").as_str(), Ok("")); + assert_eq!(Wtf8::from_str("aé 💩").as_str(), Ok("aé 💩")); + let mut string = Wtf8Buf::new(); + string.push(CodePoint::from_u32(0xD800).unwrap()); + assert!(string.as_str().is_err()); +} + +#[test] +fn wtf8_to_string_lossy() { + assert_eq!(to_string_lossy(Wtf8::from_str("")), Cow::Borrowed("")); + assert_eq!(to_string_lossy(Wtf8::from_str("aé 💩")), Cow::Borrowed("aé 💩")); + let mut string = Wtf8Buf::from_str("aé 💩"); + string.push(CodePoint::from_u32(0xD800).unwrap()); + let expected: Cow<'_, str> = Cow::Owned(String::from("aé 💩�")); + assert_eq!(to_string_lossy(&string), expected); +} + +#[test] +fn wtf8_display() { + fn d(b: &[u8]) -> String { + (&unsafe { Wtf8::from_bytes_unchecked(b) }).to_string() + } + + assert_eq!("", d("".as_bytes())); + assert_eq!("aé 💩", d("aé 💩".as_bytes())); + + let mut string = Wtf8Buf::from_str("aé 💩"); + string.push(CodePoint::from_u32(0xD800).unwrap()); + assert_eq!("aé 💩�", d(string.as_ref())); +} + +#[test] +fn wtf8_encode_wide() { + let mut string = Wtf8Buf::from_str("aé "); + string.push(CodePoint::from_u32(0xD83D).unwrap()); + string.push_char('💩'); + assert_eq!( + string.encode_wide().collect::<Vec<_>>(), + vec![0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9] + ); +} + +#[test] +fn wtf8_encode_wide_size_hint() { + let string = Wtf8Buf::from_str("\u{12345}"); + let mut iter = string.encode_wide(); + assert_eq!((1, Some(8)), iter.size_hint()); + iter.next().unwrap(); + assert_eq!((1, Some(1)), iter.size_hint()); + iter.next().unwrap(); + assert_eq!((0, Some(0)), iter.size_hint()); + assert!(iter.next().is_none()); +} + +#[test] +fn wtf8_clone_into() { + let mut string = Wtf8Buf::new(); + clone_into(Wtf8::from_str("green"), &mut string); + assert_eq!(string.as_bytes(), b"green"); + + let mut string = Wtf8Buf::from_str("green"); + clone_into(Wtf8::from_str(""), &mut string); + assert_eq!(string.as_bytes(), b""); + + let mut string = Wtf8Buf::from_str("red"); + clone_into(Wtf8::from_str("green"), &mut string); + assert_eq!(string.as_bytes(), b"green"); + + let mut string = Wtf8Buf::from_str("green"); + clone_into(Wtf8::from_str("red"), &mut string); + assert_eq!(string.as_bytes(), b"red"); + + let mut string = Wtf8Buf::from_str("green"); + assert!(string.is_known_utf8); + clone_into(unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80") }, &mut string); + assert_eq!(string.as_bytes(), b"\xED\xA0\x80"); + assert!(!string.is_known_utf8); +} + +#[test] +fn wtf8_make_ascii_lowercase() { + let mut lowercase = Wtf8Buf::from_str(""); + lowercase.make_ascii_lowercase(); + assert_eq!(lowercase.as_bytes(), b""); + + let mut lowercase = Wtf8Buf::from_str("GrEeN gRaPeS! 🍇"); + lowercase.make_ascii_lowercase(); + assert_eq!(lowercase.as_bytes(), b"green grapes! \xf0\x9f\x8d\x87"); + + let mut lowercase = to_owned(unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80") }); + lowercase.make_ascii_lowercase(); + assert_eq!(lowercase.as_bytes(), b"\xED\xA0\x80"); + assert!(!lowercase.is_known_utf8); +} + +#[test] +fn wtf8_make_ascii_uppercase() { + let mut uppercase = Wtf8Buf::from_str(""); + uppercase.make_ascii_uppercase(); + assert_eq!(uppercase.as_bytes(), b""); + + let mut uppercase = Wtf8Buf::from_str("GrEeN gRaPeS! 🍇"); + uppercase.make_ascii_uppercase(); + assert_eq!(uppercase.as_bytes(), b"GREEN GRAPES! \xf0\x9f\x8d\x87"); + + let mut uppercase = to_owned(unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80") }); + uppercase.make_ascii_uppercase(); + assert_eq!(uppercase.as_bytes(), b"\xED\xA0\x80"); + assert!(!uppercase.is_known_utf8); +} + +#[test] +fn wtf8_to_owned() { + let string = to_owned(unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80") }); + assert_eq!(string.as_bytes(), b"\xED\xA0\x80"); + assert!(!string.is_known_utf8); +} + +#[test] +fn wtf8_valid_utf8_boundaries() { + let mut string = Wtf8Buf::from_str("aé 💩"); + string.push(CodePoint::from_u32(0xD800).unwrap()); + string.push(CodePoint::from_u32(0xD800).unwrap()); + string.check_utf8_boundary(0); + string.check_utf8_boundary(1); + string.check_utf8_boundary(3); + string.check_utf8_boundary(4); + string.check_utf8_boundary(8); + string.check_utf8_boundary(14); + assert_eq!(string.len(), 14); + + string.push_char('a'); + string.check_utf8_boundary(14); + string.check_utf8_boundary(15); + + let mut string = Wtf8Buf::from_str("a"); + string.push(CodePoint::from_u32(0xD800).unwrap()); + string.check_utf8_boundary(1); + + let mut string = Wtf8Buf::from_str("\u{D7FF}"); + string.push(CodePoint::from_u32(0xD800).unwrap()); + string.check_utf8_boundary(3); + + let mut string = Wtf8Buf::new(); + string.push(CodePoint::from_u32(0xD800).unwrap()); + string.push_char('\u{D7FF}'); + string.check_utf8_boundary(3); +} + +#[test] +#[should_panic(expected = "byte index 4 is out of bounds")] +fn wtf8_utf8_boundary_out_of_bounds() { + let string = Wtf8::from_str("aé"); + string.check_utf8_boundary(4); +} + +#[test] +#[should_panic(expected = "byte index 1 is not a codepoint boundary")] +fn wtf8_utf8_boundary_inside_codepoint() { + let string = Wtf8::from_str("é"); + string.check_utf8_boundary(1); +} + +#[test] +#[should_panic(expected = "byte index 1 is not a codepoint boundary")] +fn wtf8_utf8_boundary_inside_surrogate() { + let mut string = Wtf8Buf::new(); + string.push(CodePoint::from_u32(0xD800).unwrap()); + string.check_utf8_boundary(1); +} + +#[test] +#[should_panic(expected = "byte index 3 lies between surrogate codepoints")] +fn wtf8_utf8_boundary_between_surrogates() { + let mut string = Wtf8Buf::new(); + string.push(CodePoint::from_u32(0xD800).unwrap()); + string.push(CodePoint::from_u32(0xD800).unwrap()); + string.check_utf8_boundary(3); +} + +#[test] +fn wobbled_wtf8_plus_bytes_isnt_utf8() { + let mut string: Wtf8Buf = to_owned(unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80") }); + assert!(!string.is_known_utf8); + unsafe { + string.extend_from_slice_unchecked(b"some utf-8"); + } + assert!(!string.is_known_utf8); +} + +#[test] +fn wobbled_wtf8_plus_str_isnt_utf8() { + let mut string: Wtf8Buf = to_owned(unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80") }); + assert!(!string.is_known_utf8); + string.push_str("some utf-8"); + assert!(!string.is_known_utf8); +} + +#[test] +fn unwobbly_wtf8_plus_utf8_is_utf8() { + let mut string: Wtf8Buf = Wtf8Buf::from_str("hello world"); + assert!(string.is_known_utf8); + string.push_str("some utf-8"); + assert!(string.is_known_utf8); +} |
