diff options
Diffstat (limited to 'library/std/src')
| -rw-r--r-- | library/std/src/ffi/mod.rs | 8 | ||||
| -rw-r--r-- | library/std/src/ffi/os_str.rs | 82 | ||||
| -rw-r--r-- | library/std/src/os/windows/io/handle.rs | 6 | ||||
| -rw-r--r-- | library/std/src/os/windows/io/socket.rs | 6 | ||||
| -rw-r--r-- | library/std/src/panicking.rs | 120 | ||||
| -rw-r--r-- | library/std/src/path.rs | 52 | ||||
| -rw-r--r-- | library/std/src/sys/common/small_c_string.rs | 2 | ||||
| -rw-r--r-- | library/std/src/sys/common/tests.rs | 4 | ||||
| -rw-r--r-- | library/std/src/sys/unix/os_str.rs | 9 | ||||
| -rw-r--r-- | library/std/src/sys/unix/os_str/tests.rs | 9 | ||||
| -rw-r--r-- | library/std/src/sys/unix/path.rs | 2 | ||||
| -rw-r--r-- | library/std/src/sys/unix/process/process_common.rs | 4 | ||||
| -rw-r--r-- | library/std/src/sys/wasi/fd.rs | 6 | ||||
| -rw-r--r-- | library/std/src/sys/wasi/fs.rs | 4 | ||||
| -rw-r--r-- | library/std/src/sys/windows/args.rs | 6 | ||||
| -rw-r--r-- | library/std/src/sys/windows/os_str.rs | 10 | ||||
| -rw-r--r-- | library/std/src/sys/windows/path.rs | 42 | ||||
| -rw-r--r-- | library/std/src/sys/windows/process.rs | 4 | ||||
| -rw-r--r-- | library/std/src/sys_common/wtf8.rs | 8 |
19 files changed, 237 insertions, 147 deletions
diff --git a/library/std/src/ffi/mod.rs b/library/std/src/ffi/mod.rs index d987bf69b25..3ddb8748753 100644 --- a/library/std/src/ffi/mod.rs +++ b/library/std/src/ffi/mod.rs @@ -127,6 +127,14 @@ //! trait, which provides a [`from_wide`] method to convert a native Windows //! string (without the terminating nul character) to an [`OsString`]. //! +//! ## On all platforms +//! +//! On all platforms, [`OsStr`] consists of a sequence of bytes that is encoded as a superset of +//! UTF-8; see [`OsString`] for more details on its encoding on different platforms. +//! +//! For limited, inexpensive conversions from and to bytes, see [`OsStr::as_os_str_bytes`] and +//! [`OsStr::from_os_str_bytes_unchecked`]. +//! //! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value //! [Unicode code point]: https://www.unicode.org/glossary/#code_point //! [`env::set_var()`]: crate::env::set_var "env::set_var" diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs index 5c0541d3caf..bb577acf769 100644 --- a/library/std/src/ffi/os_str.rs +++ b/library/std/src/ffi/os_str.rs @@ -667,6 +667,51 @@ impl OsStr { s.as_ref() } + /// Converts a slice of bytes to an OS string slice without checking that the string contains + /// valid `OsStr`-encoded data. + /// + /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8. + /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit + /// ASCII. + /// + /// See the [module's toplevel documentation about conversions][conversions] for safe, + /// cross-platform [conversions] from/to native representations. + /// + /// # Safety + /// + /// As the encoding is unspecified, callers must pass in bytes that originated as a mixture of + /// validated UTF-8 and bytes from [`OsStr::as_os_str_bytes`] from within the same rust version + /// built for the same target platform. For example, reconstructing an `OsStr` from bytes sent + /// over the network or stored in a file will likely violate these safety rules. + /// + /// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_os_str_bytes`] can be + /// split either immediately before or immediately after any valid non-empty UTF-8 substring. + /// + /// # Example + /// + /// ``` + /// #![feature(os_str_bytes)] + /// + /// use std::ffi::OsStr; + /// + /// let os_str = OsStr::new("Mary had a little lamb"); + /// let bytes = os_str.as_os_str_bytes(); + /// let words = bytes.split(|b| *b == b' '); + /// let words: Vec<&OsStr> = words.map(|word| { + /// // SAFETY: + /// // - Each `word` only contains content that originated from `OsStr::as_os_str_bytes` + /// // - Only split with ASCII whitespace which is a non-empty UTF-8 substring + /// unsafe { OsStr::from_os_str_bytes_unchecked(word) } + /// }).collect(); + /// ``` + /// + /// [conversions]: super#conversions + #[inline] + #[unstable(feature = "os_str_bytes", issue = "111544")] + pub unsafe fn from_os_str_bytes_unchecked(bytes: &[u8]) -> &Self { + Self::from_inner(Slice::from_os_str_bytes_unchecked(bytes)) + } + #[inline] fn from_inner(inner: &Slice) -> &OsStr { // SAFETY: OsStr is just a wrapper of Slice, @@ -837,13 +882,24 @@ impl OsStr { OsString { inner: Buf::from_box(boxed) } } - /// Gets the underlying byte representation. + /// Converts an OS string slice to a byte slice. To convert the byte slice back into an OS + /// string slice, use the [`OsStr::from_os_str_bytes_unchecked`] function. + /// + /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8. + /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit + /// ASCII. + /// + /// Note: As the encoding is unspecified, any sub-slice of bytes that is not valid UTF-8 should + /// be treated as opaque and only comparable within the same rust version built for the same + /// target platform. For example, sending the slice over the network or storing it in a file + /// will likely result in incompatible byte slices. See [`OsString`] for more encoding details + /// and [`std::ffi`] for platform-specific, specified conversions. /// - /// Note: it is *crucial* that this API is not externally public, to avoid - /// revealing the internal, platform-specific encodings. + /// [`std::ffi`]: crate::ffi #[inline] - pub(crate) fn bytes(&self) -> &[u8] { - unsafe { &*(&self.inner as *const _ as *const [u8]) } + #[unstable(feature = "os_str_bytes", issue = "111544")] + pub fn as_os_str_bytes(&self) -> &[u8] { + self.inner.as_os_str_bytes() } /// Converts this string to its ASCII lower case equivalent in-place. @@ -1131,7 +1187,7 @@ impl Default for &OsStr { impl PartialEq for OsStr { #[inline] fn eq(&self, other: &OsStr) -> bool { - self.bytes().eq(other.bytes()) + self.as_os_str_bytes().eq(other.as_os_str_bytes()) } } @@ -1158,23 +1214,23 @@ impl Eq for OsStr {} impl PartialOrd for OsStr { #[inline] fn partial_cmp(&self, other: &OsStr) -> Option<cmp::Ordering> { - self.bytes().partial_cmp(other.bytes()) + self.as_os_str_bytes().partial_cmp(other.as_os_str_bytes()) } #[inline] fn lt(&self, other: &OsStr) -> bool { - self.bytes().lt(other.bytes()) + self.as_os_str_bytes().lt(other.as_os_str_bytes()) } #[inline] fn le(&self, other: &OsStr) -> bool { - self.bytes().le(other.bytes()) + self.as_os_str_bytes().le(other.as_os_str_bytes()) } #[inline] fn gt(&self, other: &OsStr) -> bool { - self.bytes().gt(other.bytes()) + self.as_os_str_bytes().gt(other.as_os_str_bytes()) } #[inline] fn ge(&self, other: &OsStr) -> bool { - self.bytes().ge(other.bytes()) + self.as_os_str_bytes().ge(other.as_os_str_bytes()) } } @@ -1193,7 +1249,7 @@ impl PartialOrd<str> for OsStr { impl Ord for OsStr { #[inline] fn cmp(&self, other: &OsStr) -> cmp::Ordering { - self.bytes().cmp(other.bytes()) + self.as_os_str_bytes().cmp(other.as_os_str_bytes()) } } @@ -1243,7 +1299,7 @@ impl_cmp!(Cow<'a, OsStr>, OsString); impl Hash for OsStr { #[inline] fn hash<H: Hasher>(&self, state: &mut H) { - self.bytes().hash(state) + self.as_os_str_bytes().hash(state) } } diff --git a/library/std/src/os/windows/io/handle.rs b/library/std/src/os/windows/io/handle.rs index cbf8209a5ad..274af08a388 100644 --- a/library/std/src/os/windows/io/handle.rs +++ b/library/std/src/os/windows/io/handle.rs @@ -437,7 +437,7 @@ impl<T: AsHandle> AsHandle for &mut T { } } -#[stable(feature = "as_windows_ptrs", since = "CURRENT_RUSTC_VERSION")] +#[stable(feature = "as_windows_ptrs", since = "1.71.0")] /// This impl allows implementing traits that require `AsHandle` on Arc. /// ``` /// # #[cfg(windows)] mod group_cfg { @@ -457,7 +457,7 @@ impl<T: AsHandle> AsHandle for crate::sync::Arc<T> { } } -#[stable(feature = "as_windows_ptrs", since = "CURRENT_RUSTC_VERSION")] +#[stable(feature = "as_windows_ptrs", since = "1.71.0")] impl<T: AsHandle> AsHandle for crate::rc::Rc<T> { #[inline] fn as_handle(&self) -> BorrowedHandle<'_> { @@ -465,7 +465,7 @@ impl<T: AsHandle> AsHandle for crate::rc::Rc<T> { } } -#[stable(feature = "as_windows_ptrs", since = "CURRENT_RUSTC_VERSION")] +#[stable(feature = "as_windows_ptrs", since = "1.71.0")] impl<T: AsHandle> AsHandle for Box<T> { #[inline] fn as_handle(&self) -> BorrowedHandle<'_> { diff --git a/library/std/src/os/windows/io/socket.rs b/library/std/src/os/windows/io/socket.rs index 0c90d55c024..6359835cad5 100644 --- a/library/std/src/os/windows/io/socket.rs +++ b/library/std/src/os/windows/io/socket.rs @@ -254,7 +254,7 @@ impl<T: AsSocket> AsSocket for &mut T { } } -#[stable(feature = "as_windows_ptrs", since = "CURRENT_RUSTC_VERSION")] +#[stable(feature = "as_windows_ptrs", since = "1.71.0")] /// This impl allows implementing traits that require `AsSocket` on Arc. /// ``` /// # #[cfg(windows)] mod group_cfg { @@ -274,7 +274,7 @@ impl<T: AsSocket> AsSocket for crate::sync::Arc<T> { } } -#[stable(feature = "as_windows_ptrs", since = "CURRENT_RUSTC_VERSION")] +#[stable(feature = "as_windows_ptrs", since = "1.71.0")] impl<T: AsSocket> AsSocket for crate::rc::Rc<T> { #[inline] fn as_socket(&self) -> BorrowedSocket<'_> { @@ -282,7 +282,7 @@ impl<T: AsSocket> AsSocket for crate::rc::Rc<T> { } } -#[stable(feature = "as_windows_ptrs", since = "CURRENT_RUSTC_VERSION")] +#[stable(feature = "as_windows_ptrs", since = "1.71.0")] impl<T: AsSocket> AsSocket for Box<T> { #[inline] fn as_socket(&self) -> BorrowedSocket<'_> { diff --git a/library/std/src/panicking.rs b/library/std/src/panicking.rs index 6d59266b6f8..a6a370409c0 100644 --- a/library/std/src/panicking.rs +++ b/library/std/src/panicking.rs @@ -298,8 +298,18 @@ pub mod panic_count { pub const ALWAYS_ABORT_FLAG: usize = 1 << (usize::BITS - 1); - // Panic count for the current thread. - thread_local! { static LOCAL_PANIC_COUNT: Cell<usize> = const { Cell::new(0) } } + /// A reason for forcing an immediate abort on panic. + #[derive(Debug)] + pub enum MustAbort { + AlwaysAbort, + PanicInHook, + } + + // Panic count for the current thread and whether a panic hook is currently + // being executed.. + thread_local! { + static LOCAL_PANIC_COUNT: Cell<(usize, bool)> = const { Cell::new((0, false)) } + } // Sum of panic counts from all threads. The purpose of this is to have // a fast path in `count_is_zero` (which is used by `panicking`). In any particular @@ -328,34 +338,39 @@ pub mod panic_count { // panicking thread consumes at least 2 bytes of address space. static GLOBAL_PANIC_COUNT: AtomicUsize = AtomicUsize::new(0); - // Return the state of the ALWAYS_ABORT_FLAG and number of panics. + // Increases the global and local panic count, and returns whether an + // immediate abort is required. // - // If ALWAYS_ABORT_FLAG is not set, the number is determined on a per-thread - // base (stored in LOCAL_PANIC_COUNT), i.e. it is the amount of recursive calls - // of the calling thread. - // If ALWAYS_ABORT_FLAG is set, the number equals the *global* number of panic - // calls. See above why LOCAL_PANIC_COUNT is not used. - pub fn increase() -> (bool, usize) { + // This also updates thread-local state to keep track of whether a panic + // hook is currently executing. + pub fn increase(run_panic_hook: bool) -> Option<MustAbort> { let global_count = GLOBAL_PANIC_COUNT.fetch_add(1, Ordering::Relaxed); - let must_abort = global_count & ALWAYS_ABORT_FLAG != 0; - let panics = if must_abort { - global_count & !ALWAYS_ABORT_FLAG - } else { - LOCAL_PANIC_COUNT.with(|c| { - let next = c.get() + 1; - c.set(next); - next - }) - }; - (must_abort, panics) + if global_count & ALWAYS_ABORT_FLAG != 0 { + return Some(MustAbort::AlwaysAbort); + } + + LOCAL_PANIC_COUNT.with(|c| { + let (count, in_panic_hook) = c.get(); + if in_panic_hook { + return Some(MustAbort::PanicInHook); + } + c.set((count + 1, run_panic_hook)); + None + }) + } + + pub fn finished_panic_hook() { + LOCAL_PANIC_COUNT.with(|c| { + let (count, _) = c.get(); + c.set((count, false)); + }); } pub fn decrease() { GLOBAL_PANIC_COUNT.fetch_sub(1, Ordering::Relaxed); LOCAL_PANIC_COUNT.with(|c| { - let next = c.get() - 1; - c.set(next); - next + let (count, _) = c.get(); + c.set((count - 1, false)); }); } @@ -366,7 +381,7 @@ pub mod panic_count { // Disregards ALWAYS_ABORT_FLAG #[must_use] pub fn get_count() -> usize { - LOCAL_PANIC_COUNT.with(|c| c.get()) + LOCAL_PANIC_COUNT.with(|c| c.get().0) } // Disregards ALWAYS_ABORT_FLAG @@ -394,7 +409,7 @@ pub mod panic_count { #[inline(never)] #[cold] fn is_zero_slow_path() -> bool { - LOCAL_PANIC_COUNT.with(|c| c.get() == 0) + LOCAL_PANIC_COUNT.with(|c| c.get().0 == 0) } } @@ -655,23 +670,22 @@ fn rust_panic_with_hook( location: &Location<'_>, can_unwind: bool, ) -> ! { - let (must_abort, panics) = panic_count::increase(); - - // If this is the third nested call (e.g., panics == 2, this is 0-indexed), - // the panic hook probably triggered the last panic, otherwise the - // double-panic check would have aborted the process. In this case abort the - // process real quickly as we don't want to try calling it again as it'll - // probably just panic again. - if must_abort || panics > 2 { - if panics > 2 { - // Don't try to print the message in this case - // - perhaps that is causing the recursive panics. - rtprintpanic!("thread panicked while processing panic. aborting.\n"); - } else { - // Unfortunately, this does not print a backtrace, because creating - // a `Backtrace` will allocate, which we must to avoid here. - let panicinfo = PanicInfo::internal_constructor(message, location, can_unwind); - rtprintpanic!("{panicinfo}\npanicked after panic::always_abort(), aborting.\n"); + let must_abort = panic_count::increase(true); + + // Check if we need to abort immediately. + if let Some(must_abort) = must_abort { + match must_abort { + panic_count::MustAbort::PanicInHook => { + // Don't try to print the message in this case + // - perhaps that is causing the recursive panics. + rtprintpanic!("thread panicked while processing panic. aborting.\n"); + } + panic_count::MustAbort::AlwaysAbort => { + // Unfortunately, this does not print a backtrace, because creating + // a `Backtrace` will allocate, which we must to avoid here. + let panicinfo = PanicInfo::internal_constructor(message, location, can_unwind); + rtprintpanic!("{panicinfo}\npanicked after panic::always_abort(), aborting.\n"); + } } crate::sys::abort_internal(); } @@ -697,16 +711,16 @@ fn rust_panic_with_hook( }; drop(hook); - if panics > 1 || !can_unwind { - // If a thread panics while it's already unwinding then we - // have limited options. Currently our preference is to - // just abort. In the future we may consider resuming - // unwinding or otherwise exiting the thread cleanly. - if !can_unwind { - rtprintpanic!("thread caused non-unwinding panic. aborting.\n"); - } else { - rtprintpanic!("thread panicked while panicking. aborting.\n"); - } + // Indicate that we have finished executing the panic hook. After this point + // it is fine if there is a panic while executing destructors, as long as it + // it contained within a `catch_unwind`. + panic_count::finished_panic_hook(); + + if !can_unwind { + // If a thread panics while running destructors or tries to unwind + // through a nounwind function (e.g. extern "C") then we cannot continue + // unwinding and have to abort immediately. + rtprintpanic!("thread caused non-unwinding panic. aborting.\n"); crate::sys::abort_internal(); } @@ -716,7 +730,7 @@ fn rust_panic_with_hook( /// This is the entry point for `resume_unwind`. /// It just forwards the payload to the panic runtime. pub fn rust_panic_without_hook(payload: Box<dyn Any + Send>) -> ! { - panic_count::increase(); + panic_count::increase(false); struct RewrapBox(Box<dyn Any + Send>); diff --git a/library/std/src/path.rs b/library/std/src/path.rs index febdeb51463..28cd3c4e4db 100644 --- a/library/std/src/path.rs +++ b/library/std/src/path.rs @@ -193,7 +193,7 @@ impl<'a> Prefix<'a> { fn len(&self) -> usize { use self::Prefix::*; fn os_str_len(s: &OsStr) -> usize { - s.bytes().len() + s.as_os_str_bytes().len() } match *self { Verbatim(x) => 4 + os_str_len(x), @@ -299,20 +299,6 @@ where } } -unsafe fn u8_slice_as_os_str(s: &[u8]) -> &OsStr { - // SAFETY: See note at the top of this module to understand why this and - // `OsStr::bytes` are used: - // - // This casts are safe as OsStr is internally a wrapper around [u8] on all - // platforms. - // - // Note that currently this relies on the special knowledge that std has; - // these types are single-element structs but are not marked - // repr(transparent) or repr(C) which would make these casts not allowable - // outside std. - unsafe { &*(s as *const [u8] as *const OsStr) } -} - // Detect scheme on Redox fn has_redox_scheme(s: &[u8]) -> bool { cfg!(target_os = "redox") && s.contains(&b':') @@ -330,7 +316,7 @@ fn has_physical_root(s: &[u8], prefix: Option<Prefix<'_>>) -> bool { // basic workhorse for splitting stem and extension fn rsplit_file_at_dot(file: &OsStr) -> (Option<&OsStr>, Option<&OsStr>) { - if file.bytes() == b".." { + if file.as_os_str_bytes() == b".." { return (Some(file), None); } @@ -338,18 +324,23 @@ fn rsplit_file_at_dot(file: &OsStr) -> (Option<&OsStr>, Option<&OsStr>) { // and back. This is safe to do because (1) we only look at ASCII // contents of the encoding and (2) new &OsStr values are produced // only from ASCII-bounded slices of existing &OsStr values. - let mut iter = file.bytes().rsplitn(2, |b| *b == b'.'); + let mut iter = file.as_os_str_bytes().rsplitn(2, |b| *b == b'.'); let after = iter.next(); let before = iter.next(); if before == Some(b"") { (Some(file), None) } else { - unsafe { (before.map(|s| u8_slice_as_os_str(s)), after.map(|s| u8_slice_as_os_str(s))) } + unsafe { + ( + before.map(|s| OsStr::from_os_str_bytes_unchecked(s)), + after.map(|s| OsStr::from_os_str_bytes_unchecked(s)), + ) + } } } fn split_file_at_dot(file: &OsStr) -> (&OsStr, Option<&OsStr>) { - let slice = file.bytes(); + let slice = file.as_os_str_bytes(); if slice == b".." { return (file, None); } @@ -364,7 +355,12 @@ fn split_file_at_dot(file: &OsStr) -> (&OsStr, Option<&OsStr>) { }; let before = &slice[..i]; let after = &slice[i + 1..]; - unsafe { (u8_slice_as_os_str(before), Some(u8_slice_as_os_str(after))) } + unsafe { + ( + OsStr::from_os_str_bytes_unchecked(before), + Some(OsStr::from_os_str_bytes_unchecked(after)), + ) + } } //////////////////////////////////////////////////////////////////////////////// @@ -743,7 +739,7 @@ impl<'a> Components<'a> { // separately via `include_cur_dir` b".." => Some(Component::ParentDir), b"" => None, - _ => Some(Component::Normal(unsafe { u8_slice_as_os_str(comp) })), + _ => Some(Component::Normal(unsafe { OsStr::from_os_str_bytes_unchecked(comp) })), } } @@ -900,7 +896,7 @@ impl<'a> Iterator for Components<'a> { let raw = &self.path[..self.prefix_len()]; self.path = &self.path[self.prefix_len()..]; return Some(Component::Prefix(PrefixComponent { - raw: unsafe { u8_slice_as_os_str(raw) }, + raw: unsafe { OsStr::from_os_str_bytes_unchecked(raw) }, parsed: self.prefix.unwrap(), })); } @@ -972,7 +968,7 @@ impl<'a> DoubleEndedIterator for Components<'a> { State::Prefix if self.prefix_len() > 0 => { self.back = State::Done; return Some(Component::Prefix(PrefixComponent { - raw: unsafe { u8_slice_as_os_str(self.path) }, + raw: unsafe { OsStr::from_os_str_bytes_unchecked(self.path) }, parsed: self.prefix.unwrap(), })); } @@ -1481,17 +1477,17 @@ impl PathBuf { fn _set_extension(&mut self, extension: &OsStr) -> bool { let file_stem = match self.file_stem() { None => return false, - Some(f) => f.bytes(), + Some(f) => f.as_os_str_bytes(), }; // truncate until right after the file stem let end_file_stem = file_stem[file_stem.len()..].as_ptr().addr(); - let start = self.inner.bytes().as_ptr().addr(); + let start = self.inner.as_os_str_bytes().as_ptr().addr(); let v = self.as_mut_vec(); v.truncate(end_file_stem.wrapping_sub(start)); // add the new extension, if any - let new = extension.bytes(); + let new = extension.as_os_str_bytes(); if !new.is_empty() { v.reserve_exact(new.len() + 1); v.push(b'.'); @@ -2011,11 +2007,11 @@ impl Path { // The following (private!) function allows construction of a path from a u8 // slice, which is only safe when it is known to follow the OsStr encoding. unsafe fn from_u8_slice(s: &[u8]) -> &Path { - unsafe { Path::new(u8_slice_as_os_str(s)) } + unsafe { Path::new(OsStr::from_os_str_bytes_unchecked(s)) } } // The following (private!) function reveals the byte encoding used for OsStr. fn as_u8_slice(&self) -> &[u8] { - self.inner.bytes() + self.inner.as_os_str_bytes() } /// Directly wraps a string slice as a `Path` slice. diff --git a/library/std/src/sys/common/small_c_string.rs b/library/std/src/sys/common/small_c_string.rs index 01acd519135..963d17a47e4 100644 --- a/library/std/src/sys/common/small_c_string.rs +++ b/library/std/src/sys/common/small_c_string.rs @@ -19,7 +19,7 @@ pub fn run_path_with_cstr<T, F>(path: &Path, f: F) -> io::Result<T> where F: FnOnce(&CStr) -> io::Result<T>, { - run_with_cstr(path.as_os_str().bytes(), f) + run_with_cstr(path.as_os_str().as_os_str_bytes(), f) } #[inline] diff --git a/library/std/src/sys/common/tests.rs b/library/std/src/sys/common/tests.rs index fb6f5d6af83..0a1cbcbe8ef 100644 --- a/library/std/src/sys/common/tests.rs +++ b/library/std/src/sys/common/tests.rs @@ -8,7 +8,7 @@ use core::iter::repeat; fn stack_allocation_works() { let path = Path::new("abc"); let result = run_path_with_cstr(path, |p| { - assert_eq!(p, &*CString::new(path.as_os_str().bytes()).unwrap()); + assert_eq!(p, &*CString::new(path.as_os_str().as_os_str_bytes()).unwrap()); Ok(42) }); assert_eq!(result.unwrap(), 42); @@ -25,7 +25,7 @@ fn heap_allocation_works() { let path = repeat("a").take(384).collect::<String>(); let path = Path::new(&path); let result = run_path_with_cstr(path, |p| { - assert_eq!(p, &*CString::new(path.as_os_str().bytes()).unwrap()); + assert_eq!(p, &*CString::new(path.as_os_str().as_os_str_bytes()).unwrap()); Ok(42) }); assert_eq!(result.unwrap(), 42); diff --git a/library/std/src/sys/unix/os_str.rs b/library/std/src/sys/unix/os_str.rs index 488217f3941..142fcb9ed0b 100644 --- a/library/std/src/sys/unix/os_str.rs +++ b/library/std/src/sys/unix/os_str.rs @@ -193,13 +193,18 @@ impl Buf { impl Slice { #[inline] - fn from_u8_slice(s: &[u8]) -> &Slice { + pub fn as_os_str_bytes(&self) -> &[u8] { + &self.inner + } + + #[inline] + pub unsafe fn from_os_str_bytes_unchecked(s: &[u8]) -> &Slice { unsafe { mem::transmute(s) } } #[inline] pub fn from_str(s: &str) -> &Slice { - Slice::from_u8_slice(s.as_bytes()) + unsafe { Slice::from_os_str_bytes_unchecked(s.as_bytes()) } } pub fn to_str(&self) -> Option<&str> { diff --git a/library/std/src/sys/unix/os_str/tests.rs b/library/std/src/sys/unix/os_str/tests.rs index 22ba0c92350..91bc0e61a4a 100644 --- a/library/std/src/sys/unix/os_str/tests.rs +++ b/library/std/src/sys/unix/os_str/tests.rs @@ -2,7 +2,7 @@ use super::*; #[test] fn slice_debug_output() { - let input = Slice::from_u8_slice(b"\xF0hello,\tworld"); + let input = unsafe { Slice::from_os_str_bytes_unchecked(b"\xF0hello,\tworld") }; let expected = r#""\xF0hello,\tworld""#; let output = format!("{input:?}"); @@ -11,8 +11,7 @@ fn slice_debug_output() { #[test] fn display() { - assert_eq!( - "Hello\u{FFFD}\u{FFFD} There\u{FFFD} Goodbye", - Slice::from_u8_slice(b"Hello\xC0\x80 There\xE6\x83 Goodbye").to_string(), - ); + assert_eq!("Hello\u{FFFD}\u{FFFD} There\u{FFFD} Goodbye", unsafe { + Slice::from_os_str_bytes_unchecked(b"Hello\xC0\x80 There\xE6\x83 Goodbye").to_string() + },); } diff --git a/library/std/src/sys/unix/path.rs b/library/std/src/sys/unix/path.rs index a98a69e2db8..935245f637b 100644 --- a/library/std/src/sys/unix/path.rs +++ b/library/std/src/sys/unix/path.rs @@ -30,7 +30,7 @@ pub(crate) fn absolute(path: &Path) -> io::Result<PathBuf> { // Get the components, skipping the redundant leading "." component if it exists. let mut components = path.strip_prefix(".").unwrap_or(path).components(); - let path_os = path.as_os_str().bytes(); + let path_os = path.as_os_str().as_os_str_bytes(); let mut normalized = if path.is_absolute() { // "If a pathname begins with two successive <slash> characters, the diff --git a/library/std/src/sys/unix/process/process_common.rs b/library/std/src/sys/unix/process/process_common.rs index afd03d79c0b..640648e8707 100644 --- a/library/std/src/sys/unix/process/process_common.rs +++ b/library/std/src/sys/unix/process/process_common.rs @@ -164,9 +164,9 @@ pub enum ProgramKind { impl ProgramKind { fn new(program: &OsStr) -> Self { - if program.bytes().starts_with(b"/") { + if program.as_os_str_bytes().starts_with(b"/") { Self::Absolute - } else if program.bytes().contains(&b'/') { + } else if program.as_os_str_bytes().contains(&b'/') { // If the program has more than one component in it, it is a relative path. Self::Relative } else { diff --git a/library/std/src/sys/wasi/fd.rs b/library/std/src/sys/wasi/fd.rs index 9a8b2a0be5b..1b50c2ea6dd 100644 --- a/library/std/src/sys/wasi/fd.rs +++ b/library/std/src/sys/wasi/fd.rs @@ -96,7 +96,7 @@ impl WasiFd { unsafe { wasi::fd_sync(self.as_raw_fd() as wasi::Fd).map_err(err2io) } } - pub fn advise(&self, offset: u64, len: u64, advice: wasi::Advice) -> io::Result<()> { + pub(crate) fn advise(&self, offset: u64, len: u64, advice: wasi::Advice) -> io::Result<()> { unsafe { wasi::fd_advise(self.as_raw_fd() as wasi::Fd, offset, len, advice).map_err(err2io) } @@ -179,7 +179,7 @@ impl WasiFd { } } - pub fn filestat_get(&self) -> io::Result<wasi::Filestat> { + pub(crate) fn filestat_get(&self) -> io::Result<wasi::Filestat> { unsafe { wasi::fd_filestat_get(self.as_raw_fd() as wasi::Fd).map_err(err2io) } } @@ -199,7 +199,7 @@ impl WasiFd { unsafe { wasi::fd_filestat_set_size(self.as_raw_fd() as wasi::Fd, size).map_err(err2io) } } - pub fn path_filestat_get( + pub(crate) fn path_filestat_get( &self, flags: wasi::Lookupflags, path: &str, diff --git a/library/std/src/sys/wasi/fs.rs b/library/std/src/sys/wasi/fs.rs index 8d1dbf59155..437aae3ae7f 100644 --- a/library/std/src/sys/wasi/fs.rs +++ b/library/std/src/sys/wasi/fs.rs @@ -104,7 +104,7 @@ impl FileAttr { Ok(SystemTime::from_wasi_timestamp(self.meta.ctim)) } - pub fn as_wasi(&self) -> &wasi::Filestat { + pub(crate) fn as_wasi(&self) -> &wasi::Filestat { &self.meta } } @@ -142,7 +142,7 @@ impl FileType { self.bits == wasi::FILETYPE_SYMBOLIC_LINK } - pub fn bits(&self) -> wasi::Filetype { + pub(crate) fn bits(&self) -> wasi::Filetype { self.bits } } diff --git a/library/std/src/sys/windows/args.rs b/library/std/src/sys/windows/args.rs index 5bfd8b52ed0..6b597f499bc 100644 --- a/library/std/src/sys/windows/args.rs +++ b/library/std/src/sys/windows/args.rs @@ -226,7 +226,7 @@ pub(crate) fn append_arg(cmd: &mut Vec<u16>, arg: &Arg, force_quotes: bool) -> i // that it actually gets passed through on the command line or otherwise // it will be dropped entirely when parsed on the other end. ensure_no_nuls(arg)?; - let arg_bytes = arg.bytes(); + let arg_bytes = arg.as_os_str_bytes(); let (quote, escape) = match quote { Quote::Always => (true, true), Quote::Auto => { @@ -297,7 +297,9 @@ pub(crate) fn make_bat_command_line( // * `|<>` pipe/redirect characters. const SPECIAL: &[u8] = b"\t &()[]{}^=;!'+,`~%|<>"; let force_quotes = match arg { - Arg::Regular(arg) if !force_quotes => arg.bytes().iter().any(|c| SPECIAL.contains(c)), + Arg::Regular(arg) if !force_quotes => { + arg.as_os_str_bytes().iter().any(|c| SPECIAL.contains(c)) + } _ => force_quotes, }; append_arg(&mut cmd, arg, force_quotes)?; diff --git a/library/std/src/sys/windows/os_str.rs b/library/std/src/sys/windows/os_str.rs index 2f2b0e56e08..611f0d040f0 100644 --- a/library/std/src/sys/windows/os_str.rs +++ b/library/std/src/sys/windows/os_str.rs @@ -152,6 +152,16 @@ impl Buf { impl Slice { #[inline] + pub fn as_os_str_bytes(&self) -> &[u8] { + self.inner.as_bytes() + } + + #[inline] + pub unsafe fn from_os_str_bytes_unchecked(s: &[u8]) -> &Slice { + mem::transmute(Wtf8::from_bytes_unchecked(s)) + } + + #[inline] pub fn from_str(s: &str) -> &Slice { unsafe { mem::transmute(Wtf8::from_str(s)) } } diff --git a/library/std/src/sys/windows/path.rs b/library/std/src/sys/windows/path.rs index c3573d14c7f..c9c2d10e6c4 100644 --- a/library/std/src/sys/windows/path.rs +++ b/library/std/src/sys/windows/path.rs @@ -1,7 +1,6 @@ use super::{c, fill_utf16_buf, to_u16s}; use crate::ffi::{OsStr, OsString}; use crate::io; -use crate::mem; use crate::path::{Path, PathBuf, Prefix}; use crate::ptr; @@ -11,16 +10,6 @@ mod tests; pub const MAIN_SEP_STR: &str = "\\"; pub const MAIN_SEP: char = '\\'; -/// # Safety -/// -/// `bytes` must be a valid wtf8 encoded slice -#[inline] -unsafe fn bytes_as_os_str(bytes: &[u8]) -> &OsStr { - // &OsStr is layout compatible with &Slice, which is compatible with &Wtf8, - // which is compatible with &[u8]. - mem::transmute(bytes) -} - #[inline] pub fn is_sep_byte(b: u8) -> bool { b == b'/' || b == b'\\' @@ -33,12 +22,12 @@ pub fn is_verbatim_sep(b: u8) -> bool { /// Returns true if `path` looks like a lone filename. pub(crate) fn is_file_name(path: &OsStr) -> bool { - !path.bytes().iter().copied().any(is_sep_byte) + !path.as_os_str_bytes().iter().copied().any(is_sep_byte) } pub(crate) fn has_trailing_slash(path: &OsStr) -> bool { - let is_verbatim = path.bytes().starts_with(br"\\?\"); + let is_verbatim = path.as_os_str_bytes().starts_with(br"\\?\"); let is_separator = if is_verbatim { is_verbatim_sep } else { is_sep_byte }; - if let Some(&c) = path.bytes().last() { is_separator(c) } else { false } + if let Some(&c) = path.as_os_str_bytes().last() { is_separator(c) } else { false } } /// Appends a suffix to a path. @@ -60,7 +49,7 @@ impl<'a, const LEN: usize> PrefixParser<'a, LEN> { fn get_prefix(path: &OsStr) -> [u8; LEN] { let mut prefix = [0; LEN]; // SAFETY: Only ASCII characters are modified. - for (i, &ch) in path.bytes().iter().take(LEN).enumerate() { + for (i, &ch) in path.as_os_str_bytes().iter().take(LEN).enumerate() { prefix[i] = if ch == b'/' { b'\\' } else { ch }; } prefix @@ -93,7 +82,7 @@ impl<'a> PrefixParserSlice<'a, '_> { } fn prefix_bytes(&self) -> &'a [u8] { - &self.path.bytes()[..self.index] + &self.path.as_os_str_bytes()[..self.index] } fn finish(self) -> &'a OsStr { @@ -101,7 +90,7 @@ impl<'a> PrefixParserSlice<'a, '_> { // &[u8] and back. This is safe to do because (1) we only look at ASCII // contents of the encoding and (2) new &OsStr values are produced only // from ASCII-bounded slices of existing &OsStr values. - unsafe { bytes_as_os_str(&self.path.bytes()[self.index..]) } + unsafe { OsStr::from_os_str_bytes_unchecked(&self.path.as_os_str_bytes()[self.index..]) } } } @@ -173,7 +162,7 @@ fn parse_drive(path: &OsStr) -> Option<u8> { drive.is_ascii_alphabetic() } - match path.bytes() { + match path.as_os_str_bytes() { [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()), _ => None, } @@ -182,7 +171,7 @@ fn parse_drive(path: &OsStr) -> Option<u8> { // Parses a drive prefix exactly, e.g. "C:" fn parse_drive_exact(path: &OsStr) -> Option<u8> { // only parse two bytes: the drive letter and the drive separator - if path.bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) { + if path.as_os_str_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) { parse_drive(path) } else { None @@ -196,21 +185,26 @@ fn parse_drive_exact(path: &OsStr) -> Option<u8> { fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) { let separator = if verbatim { is_verbatim_sep } else { is_sep_byte }; - match path.bytes().iter().position(|&x| separator(x)) { + match path.as_os_str_bytes().iter().position(|&x| separator(x)) { Some(separator_start) => { let separator_end = separator_start + 1; - let component = &path.bytes()[..separator_start]; + let component = &path.as_os_str_bytes()[..separator_start]; // Panic safe // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index. - let path = &path.bytes()[separator_end..]; + let path = &path.as_os_str_bytes()[separator_end..]; // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\') // is encoded in a single byte, therefore `bytes[separator_start]` and // `bytes[separator_end]` must be code point boundaries and thus // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices. - unsafe { (bytes_as_os_str(component), bytes_as_os_str(path)) } + unsafe { + ( + OsStr::from_os_str_bytes_unchecked(component), + OsStr::from_os_str_bytes_unchecked(path), + ) + } } None => (path, OsStr::new("")), } @@ -329,7 +323,7 @@ pub(crate) fn absolute(path: &Path) -> io::Result<PathBuf> { // Verbatim paths should not be modified. if prefix.map(|x| x.is_verbatim()).unwrap_or(false) { // NULs in verbatim paths are rejected for consistency. - if path.bytes().contains(&0) { + if path.as_os_str_bytes().contains(&0) { return Err(io::const_io_error!( io::ErrorKind::InvalidInput, "strings passed to WinAPI cannot contain NULs", diff --git a/library/std/src/sys/windows/process.rs b/library/std/src/sys/windows/process.rs index df3667c0fd7..a573a05c39c 100644 --- a/library/std/src/sys/windows/process.rs +++ b/library/std/src/sys/windows/process.rs @@ -395,7 +395,7 @@ fn resolve_exe<'a>( // Test if the file name has the `exe` extension. // This does a case-insensitive `ends_with`. let has_exe_suffix = if exe_path.len() >= EXE_SUFFIX.len() { - exe_path.bytes()[exe_path.len() - EXE_SUFFIX.len()..] + exe_path.as_os_str_bytes()[exe_path.len() - EXE_SUFFIX.len()..] .eq_ignore_ascii_case(EXE_SUFFIX.as_bytes()) } else { false @@ -425,7 +425,7 @@ fn resolve_exe<'a>( // From the `CreateProcessW` docs: // > If the file name does not contain an extension, .exe is appended. // Note that this rule only applies when searching paths. - let has_extension = exe_path.bytes().contains(&b'.'); + let has_extension = exe_path.as_os_str_bytes().contains(&b'.'); // Search the directories given by `search_paths`. let result = search_paths(parent_paths, child_paths, |mut path| { diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs index ff96c35fb0b..31bb0ad25a6 100644 --- a/library/std/src/sys_common/wtf8.rs +++ b/library/std/src/sys_common/wtf8.rs @@ -570,7 +570,7 @@ impl Wtf8 { /// Since the byte slice is not checked for valid WTF-8, this functions is /// marked unsafe. #[inline] - unsafe fn from_bytes_unchecked(value: &[u8]) -> &Wtf8 { + pub unsafe fn from_bytes_unchecked(value: &[u8]) -> &Wtf8 { mem::transmute(value) } @@ -614,6 +614,12 @@ impl Wtf8 { Wtf8CodePoints { bytes: self.bytes.iter() } } + /// Access raw bytes of WTF-8 data + #[inline] + pub fn as_bytes(&self) -> &[u8] { + &self.bytes + } + /// Tries to convert the string to UTF-8 and return a `&str` slice. /// /// Returns `None` if the string contains surrogates. |
