diff options
| author | Stuart Cook <Zalathar@users.noreply.github.com> | 2025-05-02 22:17:00 +1000 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-05-02 22:17:00 +1000 |
| commit | 5a58c7a6ab060044399f7886cff330f3ee74c21c (patch) | |
| tree | e1204106d91d881b6aed3e66ddeeb2870824edde /library/std/src | |
| parent | 6fc78d410d506e3c95c6362046f3965810f5ec6d (diff) | |
| parent | 0f0c0d8b16b265ac57cac9fd50f1dcfe78719a6a (diff) | |
| download | rust-5a58c7a6ab060044399f7886cff330f3ee74c21c.tar.gz rust-5a58c7a6ab060044399f7886cff330f3ee74c21c.zip | |
Rollup merge of #140159 - thaliaarchi:pathbuf-extension, r=workingjubilee
Avoid redundant WTF-8 checks in `PathBuf` Eliminate checks for WTF-8 boundaries in `PathBuf::set_extension` and `add_extension`, where joining WTF-8 surrogate halves is impossible. Don't convert the `str` to `OsStr`, because `OsString::push` specializes to skip the joining when given strings. To assist in this, mark the internal methods `OsString::truncate` and `extend_from_slice` as `unsafe` to communicate their safety invariants better than with module privacy. Similar to #137777. cc `@joboet` `@ChrisDenton`
Diffstat (limited to 'library/std/src')
| -rw-r--r-- | library/std/src/ffi/os_str.rs | 22 | ||||
| -rw-r--r-- | library/std/src/path.rs | 19 | ||||
| -rw-r--r-- | library/std/src/sys/os_str/bytes.rs | 25 | ||||
| -rw-r--r-- | library/std/src/sys/os_str/wtf8.rs | 30 |
4 files changed, 65 insertions, 31 deletions
diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs index ce01175309a..72bdf03ee61 100644 --- a/library/std/src/ffi/os_str.rs +++ b/library/std/src/ffi/os_str.rs @@ -582,15 +582,25 @@ impl OsString { #[unstable(feature = "os_string_truncate", issue = "133262")] pub fn truncate(&mut self, len: usize) { self.as_os_str().inner.check_public_boundary(len); - self.inner.truncate(len); + // SAFETY: The length was just checked to be at a valid boundary. + unsafe { self.inner.truncate_unchecked(len) }; } - /// Provides plumbing to core `Vec::extend_from_slice`. - /// More well behaving alternative to allowing outer types - /// full mutable access to the core `Vec`. + /// Provides plumbing to `Vec::extend_from_slice` without giving full + /// mutable access to the `Vec`. + /// + /// # Safety + /// + /// The slice must be valid for the platform encoding (as described in + /// [`OsStr::from_encoded_bytes_unchecked`]). + /// + /// This bypasses the encoding-dependent surrogate joining, so `self` must + /// not end with a leading surrogate half and `other` must not start with + /// with a trailing surrogate half. #[inline] - pub(crate) fn extend_from_slice(&mut self, other: &[u8]) { - self.inner.extend_from_slice(other); + pub(crate) unsafe fn extend_from_slice_unchecked(&mut self, other: &[u8]) { + // SAFETY: Guaranteed by caller. + unsafe { self.inner.extend_from_slice_unchecked(other) }; } } diff --git a/library/std/src/path.rs b/library/std/src/path.rs index 7cd20c48d89..1a4a7aa7448 100644 --- a/library/std/src/path.rs +++ b/library/std/src/path.rs @@ -1529,11 +1529,13 @@ impl PathBuf { self.inner.truncate(end_file_stem.wrapping_sub(start)); // add the new extension, if any - let new = extension; + let new = extension.as_encoded_bytes(); if !new.is_empty() { self.inner.reserve_exact(new.len() + 1); - self.inner.push(OsStr::new(".")); - self.inner.push(new); + self.inner.push("."); + // SAFETY: Since a UTF-8 string was just pushed, it is not possible + // for the buffer to end with a surrogate half. + unsafe { self.inner.extend_from_slice_unchecked(new) }; } true @@ -1597,7 +1599,7 @@ impl PathBuf { Some(f) => f.as_encoded_bytes(), }; - let new = extension; + let new = extension.as_encoded_bytes(); if !new.is_empty() { // truncate until right after the file name // this is necessary for trimming the trailing slash @@ -1607,8 +1609,10 @@ impl PathBuf { // append the new extension self.inner.reserve_exact(new.len() + 1); - self.inner.push(OsStr::new(".")); - self.inner.push(new); + self.inner.push("."); + // SAFETY: Since a UTF-8 string was just pushed, it is not possible + // for the buffer to end with a surrogate half. + unsafe { self.inner.extend_from_slice_unchecked(new) }; } true @@ -2769,7 +2773,8 @@ impl Path { }; let mut new_path = PathBuf::with_capacity(new_capacity); - new_path.inner.extend_from_slice(slice_to_copy); + // SAFETY: The path is empty, so cannot have surrogate halves. + unsafe { new_path.inner.extend_from_slice_unchecked(slice_to_copy) }; new_path.set_extension(extension); new_path } diff --git a/library/std/src/sys/os_str/bytes.rs b/library/std/src/sys/os_str/bytes.rs index dfff2d3e5d3..4a8808c9230 100644 --- a/library/std/src/sys/os_str/bytes.rs +++ b/library/std/src/sys/os_str/bytes.rs @@ -216,19 +216,26 @@ impl Buf { self.as_slice().into_rc() } - /// Provides plumbing to core `Vec::truncate`. - /// More well behaving alternative to allowing outer types - /// full mutable access to the core `Vec`. - #[inline] - pub(crate) fn truncate(&mut self, len: usize) { + /// Provides plumbing to `Vec::truncate` without giving full mutable access + /// to the `Vec`. + /// + /// # Safety + /// + /// The length must be at an `OsStr` boundary, according to + /// `Slice::check_public_boundary`. + #[inline] + pub unsafe fn truncate_unchecked(&mut self, len: usize) { self.inner.truncate(len); } - /// Provides plumbing to core `Vec::extend_from_slice`. - /// More well behaving alternative to allowing outer types - /// full mutable access to the core `Vec`. + /// Provides plumbing to `Vec::extend_from_slice` without giving full + /// mutable access to the `Vec`. + /// + /// # Safety + /// + /// This encoding has no safety requirements. #[inline] - pub(crate) fn extend_from_slice(&mut self, other: &[u8]) { + pub unsafe fn extend_from_slice_unchecked(&mut self, other: &[u8]) { self.inner.extend_from_slice(other); } } diff --git a/library/std/src/sys/os_str/wtf8.rs b/library/std/src/sys/os_str/wtf8.rs index a32f5d40f6a..5174ea65d0c 100644 --- a/library/std/src/sys/os_str/wtf8.rs +++ b/library/std/src/sys/os_str/wtf8.rs @@ -195,19 +195,31 @@ impl Buf { self.as_slice().into_rc() } - /// Provides plumbing to core `Vec::truncate`. - /// More well behaving alternative to allowing outer types - /// full mutable access to the core `Vec`. - #[inline] - pub(crate) fn truncate(&mut self, len: usize) { + /// Provides plumbing to `Vec::truncate` without giving full mutable access + /// to the `Vec`. + /// + /// # Safety + /// + /// The length must be at an `OsStr` boundary, according to + /// `Slice::check_public_boundary`. + #[inline] + pub unsafe fn truncate_unchecked(&mut self, len: usize) { self.inner.truncate(len); } - /// Provides plumbing to core `Vec::extend_from_slice`. - /// More well behaving alternative to allowing outer types - /// full mutable access to the core `Vec`. + /// Provides plumbing to `Vec::extend_from_slice` without giving full + /// mutable access to the `Vec`. + /// + /// # Safety + /// + /// The slice must be valid for the platform encoding (as described in + /// [`Slice::from_encoded_bytes_unchecked`]). + /// + /// This bypasses the WTF-8 surrogate joining, so `self` must not end with a + /// leading surrogate half and `other` must not start with with a trailing + /// surrogate half. #[inline] - pub(crate) fn extend_from_slice(&mut self, other: &[u8]) { + pub unsafe fn extend_from_slice_unchecked(&mut self, other: &[u8]) { self.inner.extend_from_slice(other); } } |
