1 files changed, 43 insertions, 0 deletions
diff --git a/library/std/src/sys/os_str/bytes.rs b/library/std/src/sys/os_str/bytes.rs
index 3a75ce9ebb7..4ca3f1cd185 100644
--- a/library/std/src/sys/os_str/bytes.rs
+++ b/library/std/src/sys/os_str/bytes.rs
@@ -211,6 +211,49 @@ impl Slice {
         unsafe { mem::transmute(s) }
     }
 
+    #[track_caller]
+    #[inline]
+    pub fn check_public_boundary(&self, index: usize) {
+        if index == 0 || index == self.inner.len() {
+            return;
+        }
+        if index < self.inner.len()
+            && (self.inner[index - 1].is_ascii() || self.inner[index].is_ascii())
+        {
+            return;
+        }
+
+        slow_path(&self.inner, index);
+
+        /// We're betting that typical splits will involve an ASCII character.
+        ///
+        /// Putting the expensive checks in a separate function generates notably
+        /// better assembly.
+        #[track_caller]
+        #[inline(never)]
+        fn slow_path(bytes: &[u8], index: usize) {
+            let (before, after) = bytes.split_at(index);
+
+            // UTF-8 takes at most 4 bytes per codepoint, so we don't
+            // need to check more than that.
+            let after = after.get(..4).unwrap_or(after);
+            match str::from_utf8(after) {
+                Ok(_) => return,
+                Err(err) if err.valid_up_to() != 0 => return,
+                Err(_) => (),
+            }
+
+            for len in 2..=4.min(index) {
+                let before = &before[index - len..];
+                if str::from_utf8(before).is_ok() {
+                    return;
+                }
+            }
+
+            panic!("byte index {index} is not an OsStr boundary");
+        }
+    }
+
     #[inline]
     pub fn from_str(s: &str) -> &Slice {
         unsafe { Slice::from_encoded_bytes_unchecked(s.as_bytes()) }