about summary refs log tree commit diff
path: root/library/std/src
diff options
context:
space:
mode:
Diffstat (limited to 'library/std/src')
-rw-r--r--library/std/src/ffi/os_str.rs82
-rw-r--r--library/std/src/ffi/os_str/tests.rs50
-rw-r--r--library/std/src/lib.rs5
3 files changed, 134 insertions, 3 deletions
diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs
index fa9d48771b6..81973182148 100644
--- a/library/std/src/ffi/os_str.rs
+++ b/library/std/src/ffi/os_str.rs
@@ -6,9 +6,10 @@ use crate::cmp;
 use crate::collections::TryReserveError;
 use crate::fmt;
 use crate::hash::{Hash, Hasher};
-use crate::ops;
+use crate::ops::{self, Range};
 use crate::rc::Rc;
-use crate::str::FromStr;
+use crate::slice;
+use crate::str::{from_utf8 as str_from_utf8, FromStr};
 use crate::sync::Arc;
 
 use crate::sys::os_str::{Buf, Slice};
@@ -963,6 +964,83 @@ impl OsStr {
         self.inner.as_encoded_bytes()
     }
 
+    /// Takes a substring based on a range that corresponds to the return value of
+    /// [`OsStr::as_encoded_bytes`].
+    ///
+    /// The range's start and end must lie on valid `OsStr` boundaries.
+    /// A valid `OsStr` boundary is one of:
+    /// - The start of the string
+    /// - The end of the string
+    /// - Immediately before a valid non-empty UTF-8 substring
+    /// - Immediately after a valid non-empty UTF-8 substring
+    ///
+    /// # Panics
+    ///
+    /// Panics if `range` does not lie on valid `OsStr` boundaries or if it
+    /// exceeds the end of the string.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// #![feature(os_str_slice)]
+    ///
+    /// use std::ffi::OsStr;
+    ///
+    /// let os_str = OsStr::new("foo=bar");
+    /// let bytes = os_str.as_encoded_bytes();
+    /// if let Some(index) = bytes.iter().position(|b| *b == b'=') {
+    ///     let key = os_str.slice_encoded_bytes(..index);
+    ///     let value = os_str.slice_encoded_bytes(index + 1..);
+    ///     assert_eq!(key, "foo");
+    ///     assert_eq!(value, "bar");
+    /// }
+    /// ```
+    #[unstable(feature = "os_str_slice", issue = "118485")]
+    pub fn slice_encoded_bytes<R: ops::RangeBounds<usize>>(&self, range: R) -> &Self {
+        #[track_caller]
+        fn check_valid_boundary(bytes: &[u8], index: usize) {
+            if index == 0 || index == bytes.len() {
+                return;
+            }
+
+            // Fast path
+            if bytes[index - 1].is_ascii() || bytes[index].is_ascii() {
+                return;
+            }
+
+            let (before, after) = bytes.split_at(index);
+
+            // UTF-8 takes at most 4 bytes per codepoint, so we don't
+            // need to check more than that.
+            let after = after.get(..4).unwrap_or(after);
+            match str_from_utf8(after) {
+                Ok(_) => return,
+                Err(err) if err.valid_up_to() != 0 => return,
+                Err(_) => (),
+            }
+
+            for len in 2..=4.min(index) {
+                let before = &before[index - len..];
+                if str_from_utf8(before).is_ok() {
+                    return;
+                }
+            }
+
+            panic!("byte index {index} is not an OsStr boundary");
+        }
+
+        let encoded_bytes = self.as_encoded_bytes();
+        let Range { start, end } = slice::range(range, ..encoded_bytes.len());
+        check_valid_boundary(encoded_bytes, start);
+        check_valid_boundary(encoded_bytes, end);
+
+        // SAFETY: `slice::range` ensures that `start` and `end` are valid
+        let slice = unsafe { encoded_bytes.get_unchecked(start..end) };
+
+        // SAFETY: `slice` comes from `self` and we validated the boundaries
+        unsafe { Self::from_encoded_bytes_unchecked(slice) }
+    }
+
     /// Converts this string to its ASCII lower case equivalent in-place.
     ///
     /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
diff --git a/library/std/src/ffi/os_str/tests.rs b/library/std/src/ffi/os_str/tests.rs
index d7926749aae..2765398d3e6 100644
--- a/library/std/src/ffi/os_str/tests.rs
+++ b/library/std/src/ffi/os_str/tests.rs
@@ -177,3 +177,53 @@ fn into_rc() {
     assert_eq!(&*rc2, os_str);
     assert_eq!(&*arc2, os_str);
 }
+
+#[test]
+fn slice_encoded_bytes() {
+    let os_str = OsStr::new("123ΞΈαƒ’πŸ¦€");
+    // ASCII
+    let digits = os_str.slice_encoded_bytes(..3);
+    assert_eq!(digits, "123");
+    let three = os_str.slice_encoded_bytes(2..3);
+    assert_eq!(three, "3");
+    // 2-byte UTF-8
+    let theta = os_str.slice_encoded_bytes(3..5);
+    assert_eq!(theta, "ΞΈ");
+    // 3-byte UTF-8
+    let gani = os_str.slice_encoded_bytes(5..8);
+    assert_eq!(gani, "αƒ’");
+    // 4-byte UTF-8
+    let crab = os_str.slice_encoded_bytes(8..);
+    assert_eq!(crab, "πŸ¦€");
+}
+
+#[test]
+#[should_panic(expected = "byte index 2 is not an OsStr boundary")]
+fn slice_mid_char() {
+    let crab = OsStr::new("πŸ¦€");
+    let _ = crab.slice_encoded_bytes(..2);
+}
+
+#[cfg(windows)]
+#[test]
+#[should_panic(expected = "byte index 3 is not an OsStr boundary")]
+fn slice_between_surrogates() {
+    use crate::os::windows::ffi::OsStringExt;
+
+    let os_string = OsString::from_wide(&[0xD800, 0xD800]);
+    assert_eq!(os_string.as_encoded_bytes(), &[0xED, 0xA0, 0x80, 0xED, 0xA0, 0x80]);
+    let _ = os_string.slice_encoded_bytes(..3);
+}
+
+#[cfg(windows)]
+#[test]
+fn slice_surrogate_edge() {
+    use crate::os::windows::ffi::OsStringExt;
+
+    let os_string = OsString::from_wide(&[0xD800]);
+    let mut with_crab = os_string.clone();
+    with_crab.push("πŸ¦€");
+
+    assert_eq!(with_crab.slice_encoded_bytes(..3), os_string);
+    assert_eq!(with_crab.slice_encoded_bytes(3..), "πŸ¦€");
+}
diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs
index 8dc5b07ce10..52b1fe822d6 100644
--- a/library/std/src/lib.rs
+++ b/library/std/src/lib.rs
@@ -317,6 +317,7 @@
 #![feature(error_iter)]
 #![feature(exact_size_is_empty)]
 #![feature(exclusive_wrapper)]
+#![feature(exposed_provenance)]
 #![feature(extend_one)]
 #![feature(float_gamma)]
 #![feature(float_minimum_maximum)]
@@ -341,6 +342,7 @@
 #![feature(round_ties_even)]
 #![feature(slice_internals)]
 #![feature(slice_ptr_get)]
+#![feature(slice_range)]
 #![feature(std_internals)]
 #![feature(str_internals)]
 #![feature(strict_provenance)]
@@ -583,9 +585,10 @@ pub mod time;
 #[unstable(feature = "portable_simd", issue = "86656")]
 mod std_float;
 
-#[doc = include_str!("../../portable-simd/crates/core_simd/src/core_simd_docs.md")]
 #[unstable(feature = "portable_simd", issue = "86656")]
 pub mod simd {
+    #![doc = include_str!("../../portable-simd/crates/core_simd/src/core_simd_docs.md")]
+
     #[doc(inline)]
     pub use crate::std_float::StdFloat;
     #[doc(inline)]