diff options
Diffstat (limited to 'library/std/src')
| -rw-r--r-- | library/std/src/collections/hash/map.rs | 11 | ||||
| -rw-r--r-- | library/std/src/error.rs | 307 | ||||
| -rw-r--r-- | library/std/src/io/error.rs | 9 | ||||
| -rw-r--r-- | library/std/src/lib.rs | 5 | ||||
| -rw-r--r-- | library/std/src/macros.rs | 24 | ||||
| -rw-r--r-- | library/std/src/sys/windows/os_str.rs | 4 | ||||
| -rw-r--r-- | library/std/src/sys_common/wtf8.rs | 92 | ||||
| -rw-r--r-- | library/std/src/sys_common/wtf8/tests.rs | 295 |
8 files changed, 481 insertions, 266 deletions
diff --git a/library/std/src/collections/hash/map.rs b/library/std/src/collections/hash/map.rs index db811343fa3..9845d1faf9a 100644 --- a/library/std/src/collections/hash/map.rs +++ b/library/std/src/collections/hash/map.rs @@ -9,6 +9,8 @@ use crate::borrow::Borrow; use crate::cell::Cell; use crate::collections::TryReserveError; use crate::collections::TryReserveErrorKind; +#[cfg(not(bootstrap))] +use crate::error::Error; use crate::fmt::{self, Debug}; #[allow(deprecated)] use crate::hash::{BuildHasher, Hash, Hasher, SipHasher13}; @@ -2158,6 +2160,15 @@ impl<'a, K: Debug, V: Debug> fmt::Display for OccupiedError<'a, K, V> { } } +#[cfg(not(bootstrap))] +#[unstable(feature = "map_try_insert", issue = "82766")] +impl<'a, K: fmt::Debug, V: fmt::Debug> Error for OccupiedError<'a, K, V> { + #[allow(deprecated)] + fn description(&self) -> &str { + "key already exists" + } +} + #[stable(feature = "rust1", since = "1.0.0")] impl<'a, K, V, S> IntoIterator for &'a HashMap<K, V, S> { type Item = (&'a K, &'a V); diff --git a/library/std/src/error.rs b/library/std/src/error.rs index 4fbcfd85d7c..914f6d6d2e3 100644 --- a/library/std/src/error.rs +++ b/library/std/src/error.rs @@ -1,141 +1,4 @@ -//! The `Error` trait provides common functionality for errors. -//! -//! # Error Handling In Rust -//! -//! The Rust language provides two complementary systems for constructing / -//! representing, reporting, propagating, reacting to, and discarding errors. -//! These responsibilities are collectively known as "error handling." The -//! components of the first system, the panic runtime and interfaces, are most -//! commonly used to represent bugs that have been detected in your program. The -//! components of the second system, `Result`, the error traits, and user -//! defined types, are used to represent anticipated runtime failure modes of -//! your program. -//! -//! ## The Panic Interfaces -//! -//! The following are the primary interfaces of the panic system and the -//! responsibilities they cover: -//! -//! * [`panic!`] and [`panic_any`] (Constructing, Propagated automatically) -//! * [`PanicInfo`] (Reporting) -//! * [`set_hook`], [`take_hook`], and [`#[panic_handler]`][panic-handler] (Reporting) -//! * [`catch_unwind`] and [`resume_unwind`] (Discarding, Propagating) -//! -//! The following are the primary interfaces of the error system and the -//! responsibilities they cover: -//! -//! * [`Result`] (Propagating, Reacting) -//! * The [`Error`] trait (Reporting) -//! * User defined types (Constructing / Representing) -//! * [`match`] and [`downcast`] (Reacting) -//! * The question mark operator ([`?`]) (Propagating) -//! * The partially stable [`Try`] traits (Propagating, Constructing) -//! * [`Termination`] (Reporting) -//! -//! ## Converting Errors into Panics -//! -//! The panic and error systems are not entirely distinct. Often times errors -//! that are anticipated runtime failures in an API might instead represent bugs -//! to a caller. For these situations the standard library provides APIs for -//! constructing panics with an `Error` as it's source. -//! -//! * [`Result::unwrap`] -//! * [`Result::expect`] -//! -//! These functions are equivalent, they either return the inner value if the -//! `Result` is `Ok` or panic if the `Result` is `Err` printing the inner error -//! as the source. The only difference between them is that with `expect` you -//! provide a panic error message to be printed alongside the source, whereas -//! `unwrap` has a default message indicating only that you unwraped an `Err`. -//! -//! Of the two, `expect` is generally preferred since its `msg` field allows you -//! to convey your intent and assumptions which makes tracking down the source -//! of a panic easier. `unwrap` on the other hand can still be a good fit in -//! situations where you can trivially show that a piece of code will never -//! panic, such as `"127.0.0.1".parse::<std::net::IpAddr>().unwrap()` or early -//! prototyping. -//! -//! # Common Message Styles -//! -//! There are two common styles for how people word `expect` messages. Using -//! the message to present information to users encountering a panic -//! ("expect as error message") or using the message to present information -//! to developers debugging the panic ("expect as precondition"). -//! -//! In the former case the expect message is used to describe the error that -//! has occurred which is considered a bug. Consider the following example: -//! -//! ```should_panic -//! // Read environment variable, panic if it is not present -//! let path = std::env::var("IMPORTANT_PATH").unwrap(); -//! ``` -//! -//! In the "expect as error message" style we would use expect to describe -//! that the environment variable was not set when it should have been: -//! -//! ```should_panic -//! let path = std::env::var("IMPORTANT_PATH") -//! .expect("env variable `IMPORTANT_PATH` is not set"); -//! ``` -//! -//! In the "expect as precondition" style, we would instead describe the -//! reason we _expect_ the `Result` should be `Ok`. With this style we would -//! prefer to write: -//! -//! ```should_panic -//! let path = std::env::var("IMPORTANT_PATH") -//! .expect("env variable `IMPORTANT_PATH` should be set by `wrapper_script.sh`"); -//! ``` -//! -//! The "expect as error message" style does not work as well with the -//! default output of the std panic hooks, and often ends up repeating -//! information that is already communicated by the source error being -//! unwrapped: -//! -//! ```text -//! thread 'main' panicked at 'env variable `IMPORTANT_PATH` is not set: NotPresent', src/main.rs:4:6 -//! ``` -//! -//! In this example we end up mentioning that an env variable is not set, -//! followed by our source message that says the env is not present, the -//! only additional information we're communicating is the name of the -//! environment variable being checked. -//! -//! The "expect as precondition" style instead focuses on source code -//! readability, making it easier to understand what must have gone wrong in -//! situations where panics are being used to represent bugs exclusively. -//! Also, by framing our expect in terms of what "SHOULD" have happened to -//! prevent the source error, we end up introducing new information that is -//! independent from our source error. -//! -//! ```text -//! thread 'main' panicked at 'env variable `IMPORTANT_PATH` should be set by `wrapper_script.sh`: NotPresent', src/main.rs:4:6 -//! ``` -//! -//! In this example we are communicating not only the name of the -//! environment variable that should have been set, but also an explanation -//! for why it should have been set, and we let the source error display as -//! a clear contradiction to our expectation. -//! -//! **Hint**: If you're having trouble remembering how to phrase -//! expect-as-precondition style error messages remember to focus on the word -//! "should" as in "env variable should be set by blah" or "the given binary -//! should be available and executable by the current user". -//! -//! [`panic_any`]: crate::panic::panic_any -//! [`PanicInfo`]: crate::panic::PanicInfo -//! [`catch_unwind`]: crate::panic::catch_unwind -//! [`resume_unwind`]: crate::panic::resume_unwind -//! [`downcast`]: crate::error::Error -//! [`Termination`]: crate::process::Termination -//! [`Try`]: crate::ops::Try -//! [panic hook]: crate::panic::set_hook -//! [`set_hook`]: crate::panic::set_hook -//! [`take_hook`]: crate::panic::take_hook -//! [panic-handler]: <https://doc.rust-lang.org/nomicon/panic-handler.html> -//! [`match`]: ../../std/keyword.match.html -//! [`?`]: ../../std/result/index.html#the-question-mark-operator- - +#![doc = include_str!("../../core/src/error.md")] #![stable(feature = "rust1", since = "1.0.0")] // A note about crates and the facade: @@ -152,24 +15,48 @@ #[cfg(test)] mod tests; +#[cfg(bootstrap)] use core::array; +#[cfg(bootstrap)] use core::convert::Infallible; +#[cfg(bootstrap)] use crate::alloc::{AllocError, LayoutError}; -use crate::any::{Demand, Provider, TypeId}; +#[cfg(bootstrap)] +use crate::any::Demand; +#[cfg(bootstrap)] +use crate::any::{Provider, TypeId}; use crate::backtrace::Backtrace; +#[cfg(bootstrap)] use crate::borrow::Cow; +#[cfg(bootstrap)] use crate::cell; +#[cfg(bootstrap)] use crate::char; -use crate::fmt::{self, Debug, Display, Write}; +#[cfg(bootstrap)] +use crate::fmt::Debug; +#[cfg(bootstrap)] +use crate::fmt::Display; +use crate::fmt::{self, Write}; +#[cfg(bootstrap)] use crate::io; +#[cfg(bootstrap)] use crate::mem::transmute; +#[cfg(bootstrap)] use crate::num; +#[cfg(bootstrap)] use crate::str; +#[cfg(bootstrap)] use crate::string; +#[cfg(bootstrap)] use crate::sync::Arc; +#[cfg(bootstrap)] use crate::time; +#[cfg(not(bootstrap))] +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::error::Error; + /// `Error` is a trait representing the basic expectations for error values, /// i.e., values of type `E` in [`Result<T, E>`]. /// @@ -190,6 +77,7 @@ use crate::time; /// implementation for debugging via `source` chains. #[stable(feature = "rust1", since = "1.0.0")] #[cfg_attr(not(test), rustc_diagnostic_item = "Error")] +#[cfg(bootstrap)] pub trait Error: Debug + Display { /// The lower-level source of this error, if any. /// @@ -355,6 +243,7 @@ pub trait Error: Debug + Display { fn provide<'a>(&'a self, req: &mut Demand<'a>) {} } +#[cfg(bootstrap)] #[unstable(feature = "error_generic_member_access", issue = "99301")] impl<'b> Provider for dyn Error + 'b { fn provide<'a>(&'a self, req: &mut Demand<'a>) { @@ -370,6 +259,7 @@ mod private { pub struct Internal; } +#[cfg(bootstrap)] #[stable(feature = "rust1", since = "1.0.0")] impl<'a, E: Error + 'a> From<E> for Box<dyn Error + 'a> { /// Converts a type of [`Error`] into a box of dyn [`Error`]. @@ -402,6 +292,7 @@ impl<'a, E: Error + 'a> From<E> for Box<dyn Error + 'a> { } } +#[cfg(bootstrap)] #[stable(feature = "rust1", since = "1.0.0")] impl<'a, E: Error + Send + Sync + 'a> From<E> for Box<dyn Error + Send + Sync + 'a> { /// Converts a type of [`Error`] + [`Send`] + [`Sync`] into a box of @@ -440,6 +331,7 @@ impl<'a, E: Error + Send + Sync + 'a> From<E> for Box<dyn Error + Send + Sync + } } +#[cfg(bootstrap)] #[stable(feature = "rust1", since = "1.0.0")] impl From<String> for Box<dyn Error + Send + Sync> { /// Converts a [`String`] into a box of dyn [`Error`] + [`Send`] + [`Sync`]. @@ -483,6 +375,7 @@ impl From<String> for Box<dyn Error + Send + Sync> { } } +#[cfg(bootstrap)] #[stable(feature = "string_box_error", since = "1.6.0")] impl From<String> for Box<dyn Error> { /// Converts a [`String`] into a box of dyn [`Error`]. @@ -504,6 +397,7 @@ impl From<String> for Box<dyn Error> { } } +#[cfg(bootstrap)] #[stable(feature = "rust1", since = "1.0.0")] impl<'a> From<&str> for Box<dyn Error + Send + Sync + 'a> { /// Converts a [`str`] into a box of dyn [`Error`] + [`Send`] + [`Sync`]. @@ -527,6 +421,7 @@ impl<'a> From<&str> for Box<dyn Error + Send + Sync + 'a> { } } +#[cfg(bootstrap)] #[stable(feature = "string_box_error", since = "1.6.0")] impl From<&str> for Box<dyn Error> { /// Converts a [`str`] into a box of dyn [`Error`]. @@ -548,6 +443,7 @@ impl From<&str> for Box<dyn Error> { } } +#[cfg(bootstrap)] #[stable(feature = "cow_box_error", since = "1.22.0")] impl<'a, 'b> From<Cow<'b, str>> for Box<dyn Error + Send + Sync + 'a> { /// Converts a [`Cow`] into a box of dyn [`Error`] + [`Send`] + [`Sync`]. @@ -569,6 +465,7 @@ impl<'a, 'b> From<Cow<'b, str>> for Box<dyn Error + Send + Sync + 'a> { } } +#[cfg(bootstrap)] #[stable(feature = "cow_box_error", since = "1.22.0")] impl<'a> From<Cow<'a, str>> for Box<dyn Error> { /// Converts a [`Cow`] into a box of dyn [`Error`]. @@ -589,9 +486,11 @@ impl<'a> From<Cow<'a, str>> for Box<dyn Error> { } } +#[cfg(bootstrap)] #[unstable(feature = "never_type", issue = "35121")] impl Error for ! {} +#[cfg(bootstrap)] #[unstable( feature = "allocator_api", reason = "the precise API and guarantees it provides may be tweaked.", @@ -599,9 +498,11 @@ impl Error for ! {} )] impl Error for AllocError {} +#[cfg(bootstrap)] #[stable(feature = "alloc_layout", since = "1.28.0")] impl Error for LayoutError {} +#[cfg(bootstrap)] #[stable(feature = "rust1", since = "1.0.0")] impl Error for str::ParseBoolError { #[allow(deprecated)] @@ -610,6 +511,7 @@ impl Error for str::ParseBoolError { } } +#[cfg(bootstrap)] #[stable(feature = "rust1", since = "1.0.0")] impl Error for str::Utf8Error { #[allow(deprecated)] @@ -618,6 +520,7 @@ impl Error for str::Utf8Error { } } +#[cfg(bootstrap)] #[stable(feature = "rust1", since = "1.0.0")] impl Error for num::ParseIntError { #[allow(deprecated)] @@ -626,6 +529,7 @@ impl Error for num::ParseIntError { } } +#[cfg(bootstrap)] #[stable(feature = "try_from", since = "1.34.0")] impl Error for num::TryFromIntError { #[allow(deprecated)] @@ -634,6 +538,7 @@ impl Error for num::TryFromIntError { } } +#[cfg(bootstrap)] #[stable(feature = "try_from", since = "1.34.0")] impl Error for array::TryFromSliceError { #[allow(deprecated)] @@ -642,6 +547,7 @@ impl Error for array::TryFromSliceError { } } +#[cfg(bootstrap)] #[stable(feature = "rust1", since = "1.0.0")] impl Error for num::ParseFloatError { #[allow(deprecated)] @@ -650,6 +556,7 @@ impl Error for num::ParseFloatError { } } +#[cfg(bootstrap)] #[stable(feature = "rust1", since = "1.0.0")] impl Error for string::FromUtf8Error { #[allow(deprecated)] @@ -658,6 +565,7 @@ impl Error for string::FromUtf8Error { } } +#[cfg(bootstrap)] #[stable(feature = "rust1", since = "1.0.0")] impl Error for string::FromUtf16Error { #[allow(deprecated)] @@ -666,6 +574,7 @@ impl Error for string::FromUtf16Error { } } +#[cfg(bootstrap)] #[stable(feature = "str_parse_error2", since = "1.8.0")] impl Error for Infallible { fn description(&self) -> &str { @@ -673,6 +582,7 @@ impl Error for Infallible { } } +#[cfg(bootstrap)] #[stable(feature = "decode_utf16", since = "1.9.0")] impl Error for char::DecodeUtf16Error { #[allow(deprecated)] @@ -681,9 +591,11 @@ impl Error for char::DecodeUtf16Error { } } +#[cfg(bootstrap)] #[stable(feature = "u8_from_char", since = "1.59.0")] impl Error for char::TryFromCharError {} +#[cfg(bootstrap)] #[unstable(feature = "map_try_insert", issue = "82766")] impl<'a, K: Debug + Ord, V: Debug> Error for crate::collections::btree_map::OccupiedError<'a, K, V> @@ -694,6 +606,7 @@ impl<'a, K: Debug + Ord, V: Debug> Error } } +#[cfg(bootstrap)] #[unstable(feature = "map_try_insert", issue = "82766")] impl<'a, K: Debug, V: Debug> Error for crate::collections::hash_map::OccupiedError<'a, K, V> { #[allow(deprecated)] @@ -702,6 +615,7 @@ impl<'a, K: Debug, V: Debug> Error for crate::collections::hash_map::OccupiedErr } } +#[cfg(bootstrap)] #[stable(feature = "box_error", since = "1.8.0")] impl<T: Error> Error for Box<T> { #[allow(deprecated, deprecated_in_future)] @@ -719,6 +633,7 @@ impl<T: Error> Error for Box<T> { } } +#[cfg(bootstrap)] #[unstable(feature = "thin_box", issue = "92791")] impl<T: ?Sized + crate::error::Error> crate::error::Error for crate::boxed::ThinBox<T> { fn source(&self) -> Option<&(dyn crate::error::Error + 'static)> { @@ -727,6 +642,7 @@ impl<T: ?Sized + crate::error::Error> crate::error::Error for crate::boxed::Thin } } +#[cfg(bootstrap)] #[stable(feature = "error_by_ref", since = "1.51.0")] impl<'a, T: Error + ?Sized> Error for &'a T { #[allow(deprecated, deprecated_in_future)] @@ -748,6 +664,7 @@ impl<'a, T: Error + ?Sized> Error for &'a T { } } +#[cfg(bootstrap)] #[stable(feature = "arc_error", since = "1.52.0")] impl<T: Error + ?Sized> Error for Arc<T> { #[allow(deprecated, deprecated_in_future)] @@ -769,6 +686,7 @@ impl<T: Error + ?Sized> Error for Arc<T> { } } +#[cfg(bootstrap)] #[stable(feature = "fmt_error", since = "1.11.0")] impl Error for fmt::Error { #[allow(deprecated)] @@ -777,6 +695,7 @@ impl Error for fmt::Error { } } +#[cfg(bootstrap)] #[stable(feature = "try_borrow", since = "1.13.0")] impl Error for cell::BorrowError { #[allow(deprecated)] @@ -785,6 +704,7 @@ impl Error for cell::BorrowError { } } +#[cfg(bootstrap)] #[stable(feature = "try_borrow", since = "1.13.0")] impl Error for cell::BorrowMutError { #[allow(deprecated)] @@ -793,6 +713,7 @@ impl Error for cell::BorrowMutError { } } +#[cfg(bootstrap)] #[stable(feature = "try_from", since = "1.34.0")] impl Error for char::CharTryFromError { #[allow(deprecated)] @@ -801,6 +722,7 @@ impl Error for char::CharTryFromError { } } +#[cfg(bootstrap)] #[stable(feature = "char_from_str", since = "1.20.0")] impl Error for char::ParseCharError { #[allow(deprecated)] @@ -809,12 +731,15 @@ impl Error for char::ParseCharError { } } +#[cfg(bootstrap)] #[stable(feature = "try_reserve", since = "1.57.0")] impl Error for alloc::collections::TryReserveError {} +#[cfg(bootstrap)] #[unstable(feature = "duration_checked_float", issue = "83400")] impl Error for time::FromFloatSecsError {} +#[cfg(bootstrap)] #[stable(feature = "rust1", since = "1.0.0")] impl Error for alloc::ffi::NulError { #[allow(deprecated)] @@ -823,6 +748,7 @@ impl Error for alloc::ffi::NulError { } } +#[cfg(bootstrap)] #[stable(feature = "rust1", since = "1.0.0")] impl From<alloc::ffi::NulError> for io::Error { /// Converts a [`alloc::ffi::NulError`] into a [`io::Error`]. @@ -831,6 +757,7 @@ impl From<alloc::ffi::NulError> for io::Error { } } +#[cfg(bootstrap)] #[stable(feature = "frombyteswithnulerror_impls", since = "1.17.0")] impl Error for core::ffi::FromBytesWithNulError { #[allow(deprecated)] @@ -839,12 +766,15 @@ impl Error for core::ffi::FromBytesWithNulError { } } +#[cfg(bootstrap)] #[unstable(feature = "cstr_from_bytes_until_nul", issue = "95027")] impl Error for core::ffi::FromBytesUntilNulError {} +#[cfg(bootstrap)] #[stable(feature = "cstring_from_vec_with_nul", since = "1.58.0")] impl Error for alloc::ffi::FromVecWithNulError {} +#[cfg(bootstrap)] #[stable(feature = "cstring_into", since = "1.7.0")] impl Error for alloc::ffi::IntoStringError { #[allow(deprecated)] @@ -857,6 +787,7 @@ impl Error for alloc::ffi::IntoStringError { } } +#[cfg(bootstrap)] impl<'a> dyn Error + 'a { /// Request a reference of type `T` as context about this error. #[unstable(feature = "error_generic_member_access", issue = "99301")] @@ -872,6 +803,7 @@ impl<'a> dyn Error + 'a { } // Copied from `any.rs`. +#[cfg(bootstrap)] impl dyn Error + 'static { /// Returns `true` if the inner type is the same as `T`. #[stable(feature = "error_downcast", since = "1.3.0")] @@ -912,6 +844,7 @@ impl dyn Error + 'static { } } +#[cfg(bootstrap)] impl dyn Error + 'static + Send { /// Forwards to the method defined on the type `dyn Error`. #[stable(feature = "error_downcast", since = "1.3.0")] @@ -947,6 +880,7 @@ impl dyn Error + 'static + Send { } } +#[cfg(bootstrap)] impl dyn Error + 'static + Send + Sync { /// Forwards to the method defined on the type `dyn Error`. #[stable(feature = "error_downcast", since = "1.3.0")] @@ -982,6 +916,7 @@ impl dyn Error + 'static + Send + Sync { } } +#[cfg(bootstrap)] impl dyn Error { #[inline] #[stable(feature = "error_downcast", since = "1.3.0")] @@ -1061,10 +996,12 @@ impl dyn Error { /// its sources, use `skip(1)`. #[unstable(feature = "error_iter", issue = "58520")] #[derive(Clone, Debug)] +#[cfg(bootstrap)] pub struct Chain<'a> { current: Option<&'a (dyn Error + 'static)>, } +#[cfg(bootstrap)] #[unstable(feature = "error_iter", issue = "58520")] impl<'a> Iterator for Chain<'a> { type Item = &'a (dyn Error + 'static); @@ -1076,6 +1013,7 @@ impl<'a> Iterator for Chain<'a> { } } +#[cfg(bootstrap)] impl dyn Error + Send { #[inline] #[stable(feature = "error_downcast", since = "1.3.0")] @@ -1089,6 +1027,7 @@ impl dyn Error + Send { } } +#[cfg(bootstrap)] impl dyn Error + Send + Sync { #[inline] #[stable(feature = "error_downcast", since = "1.3.0")] @@ -1246,7 +1185,7 @@ impl dyn Error + Send + Sync { /// # Err(SuperError { source: SuperErrorSideKick }) /// # } /// -/// fn main() -> Result<(), Report> { +/// fn main() -> Result<(), Report<SuperError>> { /// get_super_error()?; /// Ok(()) /// } @@ -1293,7 +1232,7 @@ impl dyn Error + Send + Sync { /// # Err(SuperError { source: SuperErrorSideKick }) /// # } /// -/// fn main() -> Result<(), Report> { +/// fn main() -> Result<(), Report<SuperError>> { /// get_super_error() /// .map_err(Report::from) /// .map_err(|r| r.pretty(true).show_backtrace(true))?; @@ -1605,72 +1544,6 @@ where } } -impl Report<Box<dyn Error>> { - fn backtrace(&self) -> Option<&Backtrace> { - // have to grab the backtrace on the first error directly since that error may not be - // 'static - let backtrace = self.error.request_ref(); - let backtrace = backtrace.or_else(|| { - self.error - .source() - .map(|source| source.chain().find_map(|source| source.request_ref())) - .flatten() - }); - backtrace - } - - /// Format the report as a single line. - #[unstable(feature = "error_reporter", issue = "90172")] - fn fmt_singleline(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.error)?; - - let sources = self.error.source().into_iter().flat_map(<dyn Error>::chain); - - for cause in sources { - write!(f, ": {cause}")?; - } - - Ok(()) - } - - /// Format the report as multiple lines, with each error cause on its own line. - #[unstable(feature = "error_reporter", issue = "90172")] - fn fmt_multiline(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let error = &self.error; - - write!(f, "{error}")?; - - if let Some(cause) = error.source() { - write!(f, "\n\nCaused by:")?; - - let multiple = cause.source().is_some(); - - for (ind, error) in cause.chain().enumerate() { - writeln!(f)?; - let mut indented = Indented { inner: f }; - if multiple { - write!(indented, "{ind: >4}: {error}")?; - } else { - write!(indented, " {error}")?; - } - } - } - - if self.show_backtrace { - let backtrace = self.backtrace(); - - if let Some(backtrace) = backtrace { - let backtrace = backtrace.to_string(); - - f.write_str("\n\nStack backtrace:\n")?; - f.write_str(backtrace.trim_end())?; - } - } - - Ok(()) - } -} - #[unstable(feature = "error_reporter", issue = "90172")] impl<E> From<E> for Report<E> where @@ -1682,17 +1555,6 @@ where } #[unstable(feature = "error_reporter", issue = "90172")] -impl<'a, E> From<E> for Report<Box<dyn Error + 'a>> -where - E: Error + 'a, -{ - fn from(error: E) -> Self { - let error = box error; - Report { error, show_backtrace: false, pretty: false } - } -} - -#[unstable(feature = "error_reporter", issue = "90172")] impl<E> fmt::Display for Report<E> where E: Error, @@ -1702,13 +1564,6 @@ where } } -#[unstable(feature = "error_reporter", issue = "90172")] -impl fmt::Display for Report<Box<dyn Error>> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.pretty { self.fmt_multiline(f) } else { self.fmt_singleline(f) } - } -} - // This type intentionally outputs the same format for `Display` and `Debug`for // situations where you unwrap a `Report` or return it from main. #[unstable(feature = "error_reporter", issue = "90172")] diff --git a/library/std/src/io/error.rs b/library/std/src/io/error.rs index ff7fdcae16f..885e44f5e18 100644 --- a/library/std/src/io/error.rs +++ b/library/std/src/io/error.rs @@ -76,6 +76,15 @@ impl fmt::Debug for Error { } } +#[cfg(not(bootstrap))] +#[stable(feature = "rust1", since = "1.0.0")] +impl From<alloc::ffi::NulError> for Error { + /// Converts a [`alloc::ffi::NulError`] into a [`Error`]. + fn from(_: alloc::ffi::NulError) -> Error { + const_io_error!(ErrorKind::InvalidInput, "data provided contains a nul byte") + } +} + // Only derive debug in tests, to make sure it // doesn't accidentally get printed. #[cfg_attr(test, derive(Debug))] diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index 71bbf4317e0..50e3acc9400 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -252,7 +252,7 @@ #![feature(dropck_eyepatch)] #![feature(exhaustive_patterns)] #![feature(intra_doc_pointers)] -#![feature(label_break_value)] +#![cfg_attr(bootstrap, feature(label_break_value))] #![feature(lang_items)] #![feature(let_else)] #![feature(linkage)] @@ -281,6 +281,9 @@ #![feature(cstr_internals)] #![feature(duration_checked_float)] #![feature(duration_constants)] +#![cfg_attr(not(bootstrap), feature(error_generic_member_access))] +#![cfg_attr(not(bootstrap), feature(error_in_core))] +#![cfg_attr(not(bootstrap), feature(error_iter))] #![feature(exact_size_is_empty)] #![feature(exclusive_wrapper)] #![feature(extend_one)] diff --git a/library/std/src/macros.rs b/library/std/src/macros.rs index 0cb21ef53b1..a5003c66fca 100644 --- a/library/std/src/macros.rs +++ b/library/std/src/macros.rs @@ -27,12 +27,23 @@ macro_rules! panic { /// necessary to use [`io::stdout().flush()`][flush] to ensure the output is emitted /// immediately. /// +/// The `print!` macro will lock the standard output on each call. If you call +/// `print!` within a hot loop, this behavior may be the bottleneck of the loop. +/// To avoid this, lock stdout with [`io::stdout().lock()`][lock]: +/// ``` +/// use std::io::{stdout, Write}; +/// +/// let mut lock = stdout().lock(); +/// write!(lock, "hello world").unwrap(); +/// ``` +/// /// Use `print!` only for the primary output of your program. Use /// [`eprint!`] instead to print error and progress messages. /// /// [flush]: crate::io::Write::flush /// [`println!`]: crate::println /// [`eprint!`]: crate::eprint +/// [lock]: crate::io::Stdout /// /// # Panics /// @@ -75,11 +86,22 @@ macro_rules! print { /// This macro uses the same syntax as [`format!`], but writes to the standard output instead. /// See [`std::fmt`] for more information. /// +/// The `println!` macro will lock the standard output on each call. If you call +/// `println!` within a hot loop, this behavior may be the bottleneck of the loop. +/// To avoid this, lock stdout with [`io::stdout().lock()`][lock]: +/// ``` +/// use std::io::{stdout, Write}; +/// +/// let mut lock = stdout().lock(); +/// writeln!(lock, "hello world").unwrap(); +/// ``` +/// /// Use `println!` only for the primary output of your program. Use /// [`eprintln!`] instead to print error and progress messages. /// /// [`std::fmt`]: crate::fmt /// [`eprintln!`]: crate::eprintln +/// [lock]: crate::io::Stdout /// /// # Panics /// @@ -93,6 +115,8 @@ macro_rules! print { /// println!(); // prints just a newline /// println!("hello there!"); /// println!("format {} arguments", "some"); +/// let local_variable = "some"; +/// println!("format {local_variable} arguments"); /// ``` #[macro_export] #[stable(feature = "rust1", since = "1.0.0")] diff --git a/library/std/src/sys/windows/os_str.rs b/library/std/src/sys/windows/os_str.rs index 11883f15022..4bdd8c505ff 100644 --- a/library/std/src/sys/windows/os_str.rs +++ b/library/std/src/sys/windows/os_str.rs @@ -164,9 +164,7 @@ impl Slice { } pub fn to_owned(&self) -> Buf { - let mut buf = Wtf8Buf::with_capacity(self.inner.len()); - buf.push_wtf8(&self.inner); - Buf { inner: buf } + Buf { inner: self.inner.to_owned() } } pub fn clone_into(&self, buf: &mut Buf) { diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs index 33e20756163..dd53767d452 100644 --- a/library/std/src/sys_common/wtf8.rs +++ b/library/std/src/sys_common/wtf8.rs @@ -89,6 +89,24 @@ impl CodePoint { self.value } + /// Returns the numeric value of the code point if it is a leading surrogate. + #[inline] + pub fn to_lead_surrogate(&self) -> Option<u16> { + match self.value { + lead @ 0xD800..=0xDBFF => Some(lead as u16), + _ => None, + } + } + + /// Returns the numeric value of the code point if it is a trailing surrogate. + #[inline] + pub fn to_trail_surrogate(&self) -> Option<u16> { + match self.value { + trail @ 0xDC00..=0xDFFF => Some(trail as u16), + _ => None, + } + } + /// Optionally returns a Unicode scalar value for the code point. /// /// Returns `None` if the code point is a surrogate (from U+D800 to U+DFFF). @@ -117,6 +135,14 @@ impl CodePoint { #[derive(Eq, PartialEq, Ord, PartialOrd, Clone)] pub struct Wtf8Buf { bytes: Vec<u8>, + + /// Do we know that `bytes` holds a valid UTF-8 encoding? We can easily + /// know this if we're constructed from a `String` or `&str`. + /// + /// It is possible for `bytes` to have valid UTF-8 without this being + /// set, such as when we're concatenating `&Wtf8`'s and surrogates become + /// paired, as we don't bother to rescan the entire string. + is_known_utf8: bool, } impl ops::Deref for Wtf8Buf { @@ -147,13 +173,13 @@ impl Wtf8Buf { /// Creates a new, empty WTF-8 string. #[inline] pub fn new() -> Wtf8Buf { - Wtf8Buf { bytes: Vec::new() } + Wtf8Buf { bytes: Vec::new(), is_known_utf8: true } } /// Creates a new, empty WTF-8 string with pre-allocated capacity for `capacity` bytes. #[inline] pub fn with_capacity(capacity: usize) -> Wtf8Buf { - Wtf8Buf { bytes: Vec::with_capacity(capacity) } + Wtf8Buf { bytes: Vec::with_capacity(capacity), is_known_utf8: true } } /// Creates a WTF-8 string from a UTF-8 `String`. @@ -163,7 +189,7 @@ impl Wtf8Buf { /// Since WTF-8 is a superset of UTF-8, this always succeeds. #[inline] pub fn from_string(string: String) -> Wtf8Buf { - Wtf8Buf { bytes: string.into_bytes() } + Wtf8Buf { bytes: string.into_bytes(), is_known_utf8: true } } /// Creates a WTF-8 string from a UTF-8 `&str` slice. @@ -173,11 +199,12 @@ impl Wtf8Buf { /// Since WTF-8 is a superset of UTF-8, this always succeeds. #[inline] pub fn from_str(str: &str) -> Wtf8Buf { - Wtf8Buf { bytes: <[_]>::to_vec(str.as_bytes()) } + Wtf8Buf { bytes: <[_]>::to_vec(str.as_bytes()), is_known_utf8: true } } pub fn clear(&mut self) { - self.bytes.clear() + self.bytes.clear(); + self.is_known_utf8 = true; } /// Creates a WTF-8 string from a potentially ill-formed UTF-16 slice of 16-bit code units. @@ -193,9 +220,11 @@ impl Wtf8Buf { let surrogate = surrogate.unpaired_surrogate(); // Surrogates are known to be in the code point range. let code_point = unsafe { CodePoint::from_u32_unchecked(surrogate as u32) }; + // The string will now contain an unpaired surrogate. + string.is_known_utf8 = false; // Skip the WTF-8 concatenation check, // surrogate pairs are already decoded by decode_utf16 - string.push_code_point_unchecked(code_point) + string.push_code_point_unchecked(code_point); } } } @@ -203,7 +232,7 @@ impl Wtf8Buf { } /// Copied from String::push - /// This does **not** include the WTF-8 concatenation check. + /// This does **not** include the WTF-8 concatenation check or `is_known_utf8` check. fn push_code_point_unchecked(&mut self, code_point: CodePoint) { let mut bytes = [0; 4]; let bytes = char::encode_utf8_raw(code_point.value, &mut bytes); @@ -217,6 +246,9 @@ impl Wtf8Buf { #[inline] pub fn as_mut_slice(&mut self) -> &mut Wtf8 { + // Safety: `Wtf8` doesn't expose any way to mutate the bytes that would + // cause them to change from well-formed UTF-8 to ill-formed UTF-8, + // which would break the assumptions of the `is_known_utf8` field. unsafe { Wtf8::from_mut_bytes_unchecked(&mut self.bytes) } } @@ -314,7 +346,15 @@ impl Wtf8Buf { self.push_char(decode_surrogate_pair(lead, trail)); self.bytes.extend_from_slice(other_without_trail_surrogate); } - _ => self.bytes.extend_from_slice(&other.bytes), + _ => { + // If we'll be pushing a string containing a surrogate, we may + // no longer have UTF-8. + if other.next_surrogate(0).is_some() { + self.is_known_utf8 = false; + } + + self.bytes.extend_from_slice(&other.bytes); + } } } @@ -331,13 +371,19 @@ impl Wtf8Buf { /// like concatenating ill-formed UTF-16 strings effectively would. #[inline] pub fn push(&mut self, code_point: CodePoint) { - if let trail @ 0xDC00..=0xDFFF = code_point.to_u32() { + if let Some(trail) = code_point.to_trail_surrogate() { if let Some(lead) = (&*self).final_lead_surrogate() { let len_without_lead_surrogate = self.len() - 3; self.bytes.truncate(len_without_lead_surrogate); - self.push_char(decode_surrogate_pair(lead, trail as u16)); + self.push_char(decode_surrogate_pair(lead, trail)); return; } + + // We're pushing a trailing surrogate. + self.is_known_utf8 = false; + } else if code_point.to_lead_surrogate().is_some() { + // We're pushing a leading surrogate. + self.is_known_utf8 = false; } // No newly paired surrogates at the boundary. @@ -364,9 +410,10 @@ impl Wtf8Buf { /// (that is, if the string contains surrogates), /// the original WTF-8 string is returned instead. pub fn into_string(self) -> Result<String, Wtf8Buf> { - match self.next_surrogate(0) { - None => Ok(unsafe { String::from_utf8_unchecked(self.bytes) }), - Some(_) => Err(self), + if self.is_known_utf8 || self.next_surrogate(0).is_none() { + Ok(unsafe { String::from_utf8_unchecked(self.bytes) }) + } else { + Err(self) } } @@ -376,6 +423,11 @@ impl Wtf8Buf { /// /// Surrogates are replaced with `"\u{FFFD}"` (the replacement character āļæ½ā) pub fn into_string_lossy(mut self) -> String { + // Fast path: If we already have UTF-8, we can return it immediately. + if self.is_known_utf8 { + return unsafe { String::from_utf8_unchecked(self.bytes) }; + } + let mut pos = 0; loop { match self.next_surrogate(pos) { @@ -398,7 +450,7 @@ impl Wtf8Buf { /// Converts a `Box<Wtf8>` into a `Wtf8Buf`. pub fn from_box(boxed: Box<Wtf8>) -> Wtf8Buf { let bytes: Box<[u8]> = unsafe { mem::transmute(boxed) }; - Wtf8Buf { bytes: bytes.into_vec() } + Wtf8Buf { bytes: bytes.into_vec(), is_known_utf8: false } } } @@ -576,6 +628,11 @@ impl Wtf8 { } } + /// Creates an owned `Wtf8Buf` from a borrowed `Wtf8`. + pub fn to_owned(&self) -> Wtf8Buf { + Wtf8Buf { bytes: self.bytes.to_vec(), is_known_utf8: false } + } + /// Lossily converts the string to UTF-8. /// Returns a UTF-8 `&str` slice if the contents are well-formed in UTF-8. /// @@ -665,7 +722,8 @@ impl Wtf8 { } pub fn clone_into(&self, buf: &mut Wtf8Buf) { - self.bytes.clone_into(&mut buf.bytes) + buf.is_known_utf8 = false; + self.bytes.clone_into(&mut buf.bytes); } /// Boxes this `Wtf8`. @@ -705,12 +763,12 @@ impl Wtf8 { #[inline] pub fn to_ascii_lowercase(&self) -> Wtf8Buf { - Wtf8Buf { bytes: self.bytes.to_ascii_lowercase() } + Wtf8Buf { bytes: self.bytes.to_ascii_lowercase(), is_known_utf8: false } } #[inline] pub fn to_ascii_uppercase(&self) -> Wtf8Buf { - Wtf8Buf { bytes: self.bytes.to_ascii_uppercase() } + Wtf8Buf { bytes: self.bytes.to_ascii_uppercase(), is_known_utf8: false } } #[inline] diff --git a/library/std/src/sys_common/wtf8/tests.rs b/library/std/src/sys_common/wtf8/tests.rs index 931996791fb..1a302d64694 100644 --- a/library/std/src/sys_common/wtf8/tests.rs +++ b/library/std/src/sys_common/wtf8/tests.rs @@ -20,6 +20,36 @@ fn code_point_to_u32() { } #[test] +fn code_point_to_lead_surrogate() { + fn c(value: u32) -> CodePoint { + CodePoint::from_u32(value).unwrap() + } + assert_eq!(c(0).to_lead_surrogate(), None); + assert_eq!(c(0xE9).to_lead_surrogate(), None); + assert_eq!(c(0xD800).to_lead_surrogate(), Some(0xD800)); + assert_eq!(c(0xDBFF).to_lead_surrogate(), Some(0xDBFF)); + assert_eq!(c(0xDC00).to_lead_surrogate(), None); + assert_eq!(c(0xDFFF).to_lead_surrogate(), None); + assert_eq!(c(0x1F4A9).to_lead_surrogate(), None); + assert_eq!(c(0x10FFFF).to_lead_surrogate(), None); +} + +#[test] +fn code_point_to_trail_surrogate() { + fn c(value: u32) -> CodePoint { + CodePoint::from_u32(value).unwrap() + } + assert_eq!(c(0).to_trail_surrogate(), None); + assert_eq!(c(0xE9).to_trail_surrogate(), None); + assert_eq!(c(0xD800).to_trail_surrogate(), None); + assert_eq!(c(0xDBFF).to_trail_surrogate(), None); + assert_eq!(c(0xDC00).to_trail_surrogate(), Some(0xDC00)); + assert_eq!(c(0xDFFF).to_trail_surrogate(), Some(0xDFFF)); + assert_eq!(c(0x1F4A9).to_trail_surrogate(), None); + assert_eq!(c(0x10FFFF).to_trail_surrogate(), None); +} + +#[test] fn code_point_from_char() { assert_eq!(CodePoint::from_char('a').to_u32(), 0x61); assert_eq!(CodePoint::from_char('š©').to_u32(), 0x1F4A9); @@ -70,35 +100,66 @@ fn wtf8buf_from_string() { #[test] fn wtf8buf_from_wide() { - assert_eq!(Wtf8Buf::from_wide(&[]).bytes, b""); - assert_eq!( - Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]).bytes, - b"a\xC3\xA9 \xED\xA0\xBD\xF0\x9F\x92\xA9" - ); + let buf = Wtf8Buf::from_wide(&[]); + assert_eq!(buf.bytes, b""); + assert!(buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xDCA9]); + assert_eq!(buf.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]); + assert_eq!(buf.bytes, b"a\xC3\xA9 \xED\xA0\xBD\xF0\x9F\x92\xA9"); + assert!(!buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0xD800]); + assert_eq!(buf.bytes, b"\xED\xA0\x80"); + assert!(!buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0xDBFF]); + assert_eq!(buf.bytes, b"\xED\xAF\xBF"); + assert!(!buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0xDC00]); + assert_eq!(buf.bytes, b"\xED\xB0\x80"); + assert!(!buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0xDFFF]); + assert_eq!(buf.bytes, b"\xED\xBF\xBF"); + assert!(!buf.is_known_utf8); } #[test] fn wtf8buf_push_str() { let mut string = Wtf8Buf::new(); assert_eq!(string.bytes, b""); + assert!(string.is_known_utf8); + string.push_str("aĆ© š©"); assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); } #[test] fn wtf8buf_push_char() { let mut string = Wtf8Buf::from_str("aĆ© "); assert_eq!(string.bytes, b"a\xC3\xA9 "); + assert!(string.is_known_utf8); + string.push_char('š©'); assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); } #[test] fn wtf8buf_push() { let mut string = Wtf8Buf::from_str("aĆ© "); assert_eq!(string.bytes, b"a\xC3\xA9 "); + assert!(string.is_known_utf8); + string.push(CodePoint::from_char('š©')); assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() @@ -106,37 +167,46 @@ fn wtf8buf_push() { let mut string = Wtf8Buf::new(); string.push(c(0xD83D)); // lead + assert!(!string.is_known_utf8); string.push(c(0xDCA9)); // trail assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); // Magic! let mut string = Wtf8Buf::new(); string.push(c(0xD83D)); // lead + assert!(!string.is_known_utf8); string.push(c(0x20)); // not surrogate string.push(c(0xDCA9)); // trail assert_eq!(string.bytes, b"\xED\xA0\xBD \xED\xB2\xA9"); let mut string = Wtf8Buf::new(); string.push(c(0xD800)); // lead + assert!(!string.is_known_utf8); string.push(c(0xDBFF)); // lead assert_eq!(string.bytes, b"\xED\xA0\x80\xED\xAF\xBF"); let mut string = Wtf8Buf::new(); string.push(c(0xD800)); // lead + assert!(!string.is_known_utf8); string.push(c(0xE000)); // not surrogate assert_eq!(string.bytes, b"\xED\xA0\x80\xEE\x80\x80"); let mut string = Wtf8Buf::new(); string.push(c(0xD7FF)); // not surrogate + assert!(string.is_known_utf8); string.push(c(0xDC00)); // trail + assert!(!string.is_known_utf8); assert_eq!(string.bytes, b"\xED\x9F\xBF\xED\xB0\x80"); let mut string = Wtf8Buf::new(); string.push(c(0x61)); // not surrogate, < 3 bytes + assert!(string.is_known_utf8); string.push(c(0xDC00)); // trail + assert!(!string.is_known_utf8); assert_eq!(string.bytes, b"\x61\xED\xB0\x80"); let mut string = Wtf8Buf::new(); string.push(c(0xDC00)); // trail + assert!(!string.is_known_utf8); assert_eq!(string.bytes, b"\xED\xB0\x80"); } @@ -146,6 +216,7 @@ fn wtf8buf_push_wtf8() { assert_eq!(string.bytes, b"a\xC3\xA9"); string.push_wtf8(Wtf8::from_str(" š©")); assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); fn w(v: &[u8]) -> &Wtf8 { unsafe { Wtf8::from_bytes_unchecked(v) } @@ -161,37 +232,68 @@ fn wtf8buf_push_wtf8() { string.push_wtf8(w(b" ")); // not surrogate string.push_wtf8(w(b"\xED\xB2\xA9")); // trail assert_eq!(string.bytes, b"\xED\xA0\xBD \xED\xB2\xA9"); + assert!(!string.is_known_utf8); let mut string = Wtf8Buf::new(); string.push_wtf8(w(b"\xED\xA0\x80")); // lead string.push_wtf8(w(b"\xED\xAF\xBF")); // lead assert_eq!(string.bytes, b"\xED\xA0\x80\xED\xAF\xBF"); + assert!(!string.is_known_utf8); let mut string = Wtf8Buf::new(); string.push_wtf8(w(b"\xED\xA0\x80")); // lead string.push_wtf8(w(b"\xEE\x80\x80")); // not surrogate assert_eq!(string.bytes, b"\xED\xA0\x80\xEE\x80\x80"); + assert!(!string.is_known_utf8); let mut string = Wtf8Buf::new(); string.push_wtf8(w(b"\xED\x9F\xBF")); // not surrogate string.push_wtf8(w(b"\xED\xB0\x80")); // trail assert_eq!(string.bytes, b"\xED\x9F\xBF\xED\xB0\x80"); + assert!(!string.is_known_utf8); let mut string = Wtf8Buf::new(); string.push_wtf8(w(b"a")); // not surrogate, < 3 bytes string.push_wtf8(w(b"\xED\xB0\x80")); // trail assert_eq!(string.bytes, b"\x61\xED\xB0\x80"); + assert!(!string.is_known_utf8); let mut string = Wtf8Buf::new(); string.push_wtf8(w(b"\xED\xB0\x80")); // trail assert_eq!(string.bytes, b"\xED\xB0\x80"); + assert!(!string.is_known_utf8); } #[test] fn wtf8buf_truncate() { let mut string = Wtf8Buf::from_str("aĆ©"); + assert!(string.is_known_utf8); + + string.truncate(3); + assert_eq!(string.bytes, b"a\xC3\xA9"); + assert!(string.is_known_utf8); + string.truncate(1); assert_eq!(string.bytes, b"a"); + assert!(string.is_known_utf8); + + string.truncate(0); + assert_eq!(string.bytes, b""); + assert!(string.is_known_utf8); +} + +#[test] +fn wtf8buf_truncate_around_non_bmp() { + let mut string = Wtf8Buf::from_str("š©"); + assert!(string.is_known_utf8); + + string.truncate(4); + assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); + + string.truncate(0); + assert_eq!(string.bytes, b""); + assert!(string.is_known_utf8); } #[test] @@ -209,10 +311,36 @@ fn wtf8buf_truncate_fail_longer() { } #[test] +#[should_panic] +fn wtf8buf_truncate_splitting_non_bmp3() { + let mut string = Wtf8Buf::from_str("š©"); + assert!(string.is_known_utf8); + string.truncate(3); +} + +#[test] +#[should_panic] +fn wtf8buf_truncate_splitting_non_bmp2() { + let mut string = Wtf8Buf::from_str("š©"); + assert!(string.is_known_utf8); + string.truncate(2); +} + +#[test] +#[should_panic] +fn wtf8buf_truncate_splitting_non_bmp1() { + let mut string = Wtf8Buf::from_str("š©"); + assert!(string.is_known_utf8); + string.truncate(1); +} + +#[test] fn wtf8buf_into_string() { let mut string = Wtf8Buf::from_str("aĆ© š©"); + assert!(string.is_known_utf8); assert_eq!(string.clone().into_string(), Ok(String::from("aĆ© š©"))); string.push(CodePoint::from_u32(0xD800).unwrap()); + assert!(!string.is_known_utf8); assert_eq!(string.clone().into_string(), Err(string)); } @@ -229,15 +357,33 @@ fn wtf8buf_from_iterator() { fn f(values: &[u32]) -> Wtf8Buf { values.iter().map(|&c| CodePoint::from_u32(c).unwrap()).collect::<Wtf8Buf>() } - assert_eq!(f(&[0x61, 0xE9, 0x20, 0x1F4A9]).bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert_eq!( + f(&[0x61, 0xE9, 0x20, 0x1F4A9]), + Wtf8Buf { bytes: b"a\xC3\xA9 \xF0\x9F\x92\xA9".to_vec(), is_known_utf8: true } + ); assert_eq!(f(&[0xD83D, 0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic! - assert_eq!(f(&[0xD83D, 0x20, 0xDCA9]).bytes, b"\xED\xA0\xBD \xED\xB2\xA9"); - assert_eq!(f(&[0xD800, 0xDBFF]).bytes, b"\xED\xA0\x80\xED\xAF\xBF"); - assert_eq!(f(&[0xD800, 0xE000]).bytes, b"\xED\xA0\x80\xEE\x80\x80"); - assert_eq!(f(&[0xD7FF, 0xDC00]).bytes, b"\xED\x9F\xBF\xED\xB0\x80"); - assert_eq!(f(&[0x61, 0xDC00]).bytes, b"\x61\xED\xB0\x80"); - assert_eq!(f(&[0xDC00]).bytes, b"\xED\xB0\x80"); + assert_eq!( + f(&[0xD83D, 0x20, 0xDCA9]), + Wtf8Buf { bytes: b"\xED\xA0\xBD \xED\xB2\xA9".to_vec(), is_known_utf8: false } + ); + assert_eq!( + f(&[0xD800, 0xDBFF]), + Wtf8Buf { bytes: b"\xED\xA0\x80\xED\xAF\xBF".to_vec(), is_known_utf8: false } + ); + assert_eq!( + f(&[0xD800, 0xE000]), + Wtf8Buf { bytes: b"\xED\xA0\x80\xEE\x80\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + f(&[0xD7FF, 0xDC00]), + Wtf8Buf { bytes: b"\xED\x9F\xBF\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + f(&[0x61, 0xDC00]), + Wtf8Buf { bytes: b"\x61\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!(f(&[0xDC00]), Wtf8Buf { bytes: b"\xED\xB0\x80".to_vec(), is_known_utf8: false }); } #[test] @@ -251,15 +397,36 @@ fn wtf8buf_extend() { string } - assert_eq!(e(&[0x61, 0xE9], &[0x20, 0x1F4A9]).bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert_eq!( + e(&[0x61, 0xE9], &[0x20, 0x1F4A9]), + Wtf8Buf { bytes: b"a\xC3\xA9 \xF0\x9F\x92\xA9".to_vec(), is_known_utf8: true } + ); assert_eq!(e(&[0xD83D], &[0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic! - assert_eq!(e(&[0xD83D, 0x20], &[0xDCA9]).bytes, b"\xED\xA0\xBD \xED\xB2\xA9"); - assert_eq!(e(&[0xD800], &[0xDBFF]).bytes, b"\xED\xA0\x80\xED\xAF\xBF"); - assert_eq!(e(&[0xD800], &[0xE000]).bytes, b"\xED\xA0\x80\xEE\x80\x80"); - assert_eq!(e(&[0xD7FF], &[0xDC00]).bytes, b"\xED\x9F\xBF\xED\xB0\x80"); - assert_eq!(e(&[0x61], &[0xDC00]).bytes, b"\x61\xED\xB0\x80"); - assert_eq!(e(&[], &[0xDC00]).bytes, b"\xED\xB0\x80"); + assert_eq!( + e(&[0xD83D, 0x20], &[0xDCA9]), + Wtf8Buf { bytes: b"\xED\xA0\xBD \xED\xB2\xA9".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[0xD800], &[0xDBFF]), + Wtf8Buf { bytes: b"\xED\xA0\x80\xED\xAF\xBF".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[0xD800], &[0xE000]), + Wtf8Buf { bytes: b"\xED\xA0\x80\xEE\x80\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[0xD7FF], &[0xDC00]), + Wtf8Buf { bytes: b"\xED\x9F\xBF\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[0x61], &[0xDC00]), + Wtf8Buf { bytes: b"\x61\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[], &[0xDC00]), + Wtf8Buf { bytes: b"\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); } #[test] @@ -407,3 +574,93 @@ fn wtf8_encode_wide_size_hint() { assert_eq!((0, Some(0)), iter.size_hint()); assert!(iter.next().is_none()); } + +#[test] +fn wtf8_clone_into() { + let mut string = Wtf8Buf::new(); + Wtf8::from_str("green").clone_into(&mut string); + assert_eq!(string.bytes, b"green"); + + let mut string = Wtf8Buf::from_str("green"); + Wtf8::from_str("").clone_into(&mut string); + assert_eq!(string.bytes, b""); + + let mut string = Wtf8Buf::from_str("red"); + Wtf8::from_str("green").clone_into(&mut string); + assert_eq!(string.bytes, b"green"); + + let mut string = Wtf8Buf::from_str("green"); + Wtf8::from_str("red").clone_into(&mut string); + assert_eq!(string.bytes, b"red"); + + let mut string = Wtf8Buf::from_str("green"); + assert!(string.is_known_utf8); + unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").clone_into(&mut string) }; + assert_eq!(string.bytes, b"\xED\xA0\x80"); + assert!(!string.is_known_utf8); +} + +#[test] +fn wtf8_to_ascii_lowercase() { + let lowercase = Wtf8::from_str("").to_ascii_lowercase(); + assert_eq!(lowercase.bytes, b""); + + let lowercase = Wtf8::from_str("GrEeN gRaPeS! š").to_ascii_lowercase(); + assert_eq!(lowercase.bytes, b"green grapes! \xf0\x9f\x8d\x87"); + + let lowercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_ascii_lowercase() }; + assert_eq!(lowercase.bytes, b"\xED\xA0\x80"); + assert!(!lowercase.is_known_utf8); +} + +#[test] +fn wtf8_to_ascii_uppercase() { + let uppercase = Wtf8::from_str("").to_ascii_uppercase(); + assert_eq!(uppercase.bytes, b""); + + let uppercase = Wtf8::from_str("GrEeN gRaPeS! š").to_ascii_uppercase(); + assert_eq!(uppercase.bytes, b"GREEN GRAPES! \xf0\x9f\x8d\x87"); + + let uppercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_ascii_uppercase() }; + assert_eq!(uppercase.bytes, b"\xED\xA0\x80"); + assert!(!uppercase.is_known_utf8); +} + +#[test] +fn wtf8_make_ascii_lowercase() { + let mut lowercase = Wtf8Buf::from_str(""); + lowercase.make_ascii_lowercase(); + assert_eq!(lowercase.bytes, b""); + + let mut lowercase = Wtf8Buf::from_str("GrEeN gRaPeS! š"); + lowercase.make_ascii_lowercase(); + assert_eq!(lowercase.bytes, b"green grapes! \xf0\x9f\x8d\x87"); + + let mut lowercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + lowercase.make_ascii_lowercase(); + assert_eq!(lowercase.bytes, b"\xED\xA0\x80"); + assert!(!lowercase.is_known_utf8); +} + +#[test] +fn wtf8_make_ascii_uppercase() { + let mut uppercase = Wtf8Buf::from_str(""); + uppercase.make_ascii_uppercase(); + assert_eq!(uppercase.bytes, b""); + + let mut uppercase = Wtf8Buf::from_str("GrEeN gRaPeS! š"); + uppercase.make_ascii_uppercase(); + assert_eq!(uppercase.bytes, b"GREEN GRAPES! \xf0\x9f\x8d\x87"); + + let mut uppercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + uppercase.make_ascii_uppercase(); + assert_eq!(uppercase.bytes, b"\xED\xA0\x80"); + assert!(!uppercase.is_known_utf8); +} + +#[test] +fn wtf8_to_owned() { + let string = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + assert_eq!(string.bytes, b"\xED\xA0\x80"); + assert!(!string.is_known_utf8); +} |
