about summary refs log tree commit diff
path: root/library/std/src
diff options
context:
space:
mode:
authorStuart Cook <Zalathar@users.noreply.github.com>2025-05-02 22:17:00 +1000
committerGitHub <noreply@github.com>2025-05-02 22:17:00 +1000
commit5a58c7a6ab060044399f7886cff330f3ee74c21c (patch)
treee1204106d91d881b6aed3e66ddeeb2870824edde /library/std/src
parent6fc78d410d506e3c95c6362046f3965810f5ec6d (diff)
parent0f0c0d8b16b265ac57cac9fd50f1dcfe78719a6a (diff)
downloadrust-5a58c7a6ab060044399f7886cff330f3ee74c21c.tar.gz
rust-5a58c7a6ab060044399f7886cff330f3ee74c21c.zip
Rollup merge of #140159 - thaliaarchi:pathbuf-extension, r=workingjubilee
Avoid redundant WTF-8 checks in `PathBuf`

Eliminate checks for WTF-8 boundaries in `PathBuf::set_extension` and `add_extension`, where joining WTF-8 surrogate halves is impossible. Don't convert the `str` to `OsStr`, because `OsString::push` specializes to skip the joining when given strings.

To assist in this, mark the internal methods `OsString::truncate` and `extend_from_slice` as `unsafe` to communicate their safety invariants better than with module privacy.

Similar to #137777.

cc `@joboet` `@ChrisDenton`
Diffstat (limited to 'library/std/src')
-rw-r--r--library/std/src/ffi/os_str.rs22
-rw-r--r--library/std/src/path.rs19
-rw-r--r--library/std/src/sys/os_str/bytes.rs25
-rw-r--r--library/std/src/sys/os_str/wtf8.rs30
4 files changed, 65 insertions, 31 deletions
diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs
index ce01175309a..72bdf03ee61 100644
--- a/library/std/src/ffi/os_str.rs
+++ b/library/std/src/ffi/os_str.rs
@@ -582,15 +582,25 @@ impl OsString {
     #[unstable(feature = "os_string_truncate", issue = "133262")]
     pub fn truncate(&mut self, len: usize) {
         self.as_os_str().inner.check_public_boundary(len);
-        self.inner.truncate(len);
+        // SAFETY: The length was just checked to be at a valid boundary.
+        unsafe { self.inner.truncate_unchecked(len) };
     }
 
-    /// Provides plumbing to core `Vec::extend_from_slice`.
-    /// More well behaving alternative to allowing outer types
-    /// full mutable access to the core `Vec`.
+    /// Provides plumbing to `Vec::extend_from_slice` without giving full
+    /// mutable access to the `Vec`.
+    ///
+    /// # Safety
+    ///
+    /// The slice must be valid for the platform encoding (as described in
+    /// [`OsStr::from_encoded_bytes_unchecked`]).
+    ///
+    /// This bypasses the encoding-dependent surrogate joining, so `self` must
+    /// not end with a leading surrogate half and `other` must not start with
+    /// with a trailing surrogate half.
     #[inline]
-    pub(crate) fn extend_from_slice(&mut self, other: &[u8]) {
-        self.inner.extend_from_slice(other);
+    pub(crate) unsafe fn extend_from_slice_unchecked(&mut self, other: &[u8]) {
+        // SAFETY: Guaranteed by caller.
+        unsafe { self.inner.extend_from_slice_unchecked(other) };
     }
 }
 
diff --git a/library/std/src/path.rs b/library/std/src/path.rs
index 7cd20c48d89..1a4a7aa7448 100644
--- a/library/std/src/path.rs
+++ b/library/std/src/path.rs
@@ -1529,11 +1529,13 @@ impl PathBuf {
         self.inner.truncate(end_file_stem.wrapping_sub(start));
 
         // add the new extension, if any
-        let new = extension;
+        let new = extension.as_encoded_bytes();
         if !new.is_empty() {
             self.inner.reserve_exact(new.len() + 1);
-            self.inner.push(OsStr::new("."));
-            self.inner.push(new);
+            self.inner.push(".");
+            // SAFETY: Since a UTF-8 string was just pushed, it is not possible
+            // for the buffer to end with a surrogate half.
+            unsafe { self.inner.extend_from_slice_unchecked(new) };
         }
 
         true
@@ -1597,7 +1599,7 @@ impl PathBuf {
             Some(f) => f.as_encoded_bytes(),
         };
 
-        let new = extension;
+        let new = extension.as_encoded_bytes();
         if !new.is_empty() {
             // truncate until right after the file name
             // this is necessary for trimming the trailing slash
@@ -1607,8 +1609,10 @@ impl PathBuf {
 
             // append the new extension
             self.inner.reserve_exact(new.len() + 1);
-            self.inner.push(OsStr::new("."));
-            self.inner.push(new);
+            self.inner.push(".");
+            // SAFETY: Since a UTF-8 string was just pushed, it is not possible
+            // for the buffer to end with a surrogate half.
+            unsafe { self.inner.extend_from_slice_unchecked(new) };
         }
 
         true
@@ -2769,7 +2773,8 @@ impl Path {
         };
 
         let mut new_path = PathBuf::with_capacity(new_capacity);
-        new_path.inner.extend_from_slice(slice_to_copy);
+        // SAFETY: The path is empty, so cannot have surrogate halves.
+        unsafe { new_path.inner.extend_from_slice_unchecked(slice_to_copy) };
         new_path.set_extension(extension);
         new_path
     }
diff --git a/library/std/src/sys/os_str/bytes.rs b/library/std/src/sys/os_str/bytes.rs
index dfff2d3e5d3..4a8808c9230 100644
--- a/library/std/src/sys/os_str/bytes.rs
+++ b/library/std/src/sys/os_str/bytes.rs
@@ -216,19 +216,26 @@ impl Buf {
         self.as_slice().into_rc()
     }
 
-    /// Provides plumbing to core `Vec::truncate`.
-    /// More well behaving alternative to allowing outer types
-    /// full mutable access to the core `Vec`.
-    #[inline]
-    pub(crate) fn truncate(&mut self, len: usize) {
+    /// Provides plumbing to `Vec::truncate` without giving full mutable access
+    /// to the `Vec`.
+    ///
+    /// # Safety
+    ///
+    /// The length must be at an `OsStr` boundary, according to
+    /// `Slice::check_public_boundary`.
+    #[inline]
+    pub unsafe fn truncate_unchecked(&mut self, len: usize) {
         self.inner.truncate(len);
     }
 
-    /// Provides plumbing to core `Vec::extend_from_slice`.
-    /// More well behaving alternative to allowing outer types
-    /// full mutable access to the core `Vec`.
+    /// Provides plumbing to `Vec::extend_from_slice` without giving full
+    /// mutable access to the `Vec`.
+    ///
+    /// # Safety
+    ///
+    /// This encoding has no safety requirements.
     #[inline]
-    pub(crate) fn extend_from_slice(&mut self, other: &[u8]) {
+    pub unsafe fn extend_from_slice_unchecked(&mut self, other: &[u8]) {
         self.inner.extend_from_slice(other);
     }
 }
diff --git a/library/std/src/sys/os_str/wtf8.rs b/library/std/src/sys/os_str/wtf8.rs
index a32f5d40f6a..5174ea65d0c 100644
--- a/library/std/src/sys/os_str/wtf8.rs
+++ b/library/std/src/sys/os_str/wtf8.rs
@@ -195,19 +195,31 @@ impl Buf {
         self.as_slice().into_rc()
     }
 
-    /// Provides plumbing to core `Vec::truncate`.
-    /// More well behaving alternative to allowing outer types
-    /// full mutable access to the core `Vec`.
-    #[inline]
-    pub(crate) fn truncate(&mut self, len: usize) {
+    /// Provides plumbing to `Vec::truncate` without giving full mutable access
+    /// to the `Vec`.
+    ///
+    /// # Safety
+    ///
+    /// The length must be at an `OsStr` boundary, according to
+    /// `Slice::check_public_boundary`.
+    #[inline]
+    pub unsafe fn truncate_unchecked(&mut self, len: usize) {
         self.inner.truncate(len);
     }
 
-    /// Provides plumbing to core `Vec::extend_from_slice`.
-    /// More well behaving alternative to allowing outer types
-    /// full mutable access to the core `Vec`.
+    /// Provides plumbing to `Vec::extend_from_slice` without giving full
+    /// mutable access to the `Vec`.
+    ///
+    /// # Safety
+    ///
+    /// The slice must be valid for the platform encoding (as described in
+    /// [`Slice::from_encoded_bytes_unchecked`]).
+    ///
+    /// This bypasses the WTF-8 surrogate joining, so `self` must not end with a
+    /// leading surrogate half and `other` must not start with with a trailing
+    /// surrogate half.
     #[inline]
-    pub(crate) fn extend_from_slice(&mut self, other: &[u8]) {
+    pub unsafe fn extend_from_slice_unchecked(&mut self, other: &[u8]) {
         self.inner.extend_from_slice(other);
     }
 }