diff options
Diffstat (limited to 'library/std/src/sys/os_str/bytes.rs')
| -rw-r--r-- | library/std/src/sys/os_str/bytes.rs | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/library/std/src/sys/os_str/bytes.rs b/library/std/src/sys/os_str/bytes.rs index 3a75ce9ebb7..4ca3f1cd185 100644 --- a/library/std/src/sys/os_str/bytes.rs +++ b/library/std/src/sys/os_str/bytes.rs @@ -211,6 +211,49 @@ impl Slice { unsafe { mem::transmute(s) } } + #[track_caller] + #[inline] + pub fn check_public_boundary(&self, index: usize) { + if index == 0 || index == self.inner.len() { + return; + } + if index < self.inner.len() + && (self.inner[index - 1].is_ascii() || self.inner[index].is_ascii()) + { + return; + } + + slow_path(&self.inner, index); + + /// We're betting that typical splits will involve an ASCII character. + /// + /// Putting the expensive checks in a separate function generates notably + /// better assembly. + #[track_caller] + #[inline(never)] + fn slow_path(bytes: &[u8], index: usize) { + let (before, after) = bytes.split_at(index); + + // UTF-8 takes at most 4 bytes per codepoint, so we don't + // need to check more than that. + let after = after.get(..4).unwrap_or(after); + match str::from_utf8(after) { + Ok(_) => return, + Err(err) if err.valid_up_to() != 0 => return, + Err(_) => (), + } + + for len in 2..=4.min(index) { + let before = &before[index - len..]; + if str::from_utf8(before).is_ok() { + return; + } + } + + panic!("byte index {index} is not an OsStr boundary"); + } + } + #[inline] pub fn from_str(s: &str) -> &Slice { unsafe { Slice::from_encoded_bytes_unchecked(s.as_bytes()) } |
