diff options
Diffstat (limited to 'library/std/src/sys/path/windows.rs')
| -rw-r--r-- | library/std/src/sys/path/windows.rs | 175 |
1 files changed, 3 insertions, 172 deletions
diff --git a/library/std/src/sys/path/windows.rs b/library/std/src/sys/path/windows.rs index e0e003f6a81..f124e1e5a71 100644 --- a/library/std/src/sys/path/windows.rs +++ b/library/std/src/sys/path/windows.rs @@ -1,5 +1,5 @@ use crate::ffi::{OsStr, OsString}; -use crate::path::{Path, PathBuf, Prefix}; +use crate::path::{Path, PathBuf}; use crate::sys::api::utf16; use crate::sys::pal::{c, fill_utf16_buf, os2path, to_u16s}; use crate::{io, ptr}; @@ -7,6 +7,8 @@ use crate::{io, ptr}; #[cfg(test)] mod tests; +pub use super::windows_prefix::parse_prefix; + pub const MAIN_SEP_STR: &str = "\\"; pub const MAIN_SEP: char = '\\'; @@ -77,177 +79,6 @@ pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf { path.into() } -struct PrefixParser<'a, const LEN: usize> { - path: &'a OsStr, - prefix: [u8; LEN], -} - -impl<'a, const LEN: usize> PrefixParser<'a, LEN> { - #[inline] - fn get_prefix(path: &OsStr) -> [u8; LEN] { - let mut prefix = [0; LEN]; - // SAFETY: Only ASCII characters are modified. - for (i, &ch) in path.as_encoded_bytes().iter().take(LEN).enumerate() { - prefix[i] = if ch == b'/' { b'\\' } else { ch }; - } - prefix - } - - fn new(path: &'a OsStr) -> Self { - Self { path, prefix: Self::get_prefix(path) } - } - - fn as_slice(&self) -> PrefixParserSlice<'a, '_> { - PrefixParserSlice { - path: self.path, - prefix: &self.prefix[..LEN.min(self.path.len())], - index: 0, - } - } -} - -struct PrefixParserSlice<'a, 'b> { - path: &'a OsStr, - prefix: &'b [u8], - index: usize, -} - -impl<'a> PrefixParserSlice<'a, '_> { - fn strip_prefix(&self, prefix: &str) -> Option<Self> { - self.prefix[self.index..] - .starts_with(prefix.as_bytes()) - .then_some(Self { index: self.index + prefix.len(), ..*self }) - } - - fn prefix_bytes(&self) -> &'a [u8] { - &self.path.as_encoded_bytes()[..self.index] - } - - fn finish(self) -> &'a OsStr { - // SAFETY: The unsafety here stems from converting between &OsStr and - // &[u8] and back. This is safe to do because (1) we only look at ASCII - // contents of the encoding and (2) new &OsStr values are produced only - // from ASCII-bounded slices of existing &OsStr values. - unsafe { OsStr::from_encoded_bytes_unchecked(&self.path.as_encoded_bytes()[self.index..]) } - } -} - -pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> { - use Prefix::{DeviceNS, Disk, UNC, Verbatim, VerbatimDisk, VerbatimUNC}; - - let parser = PrefixParser::<8>::new(path); - let parser = parser.as_slice(); - if let Some(parser) = parser.strip_prefix(r"\\") { - // \\ - - // The meaning of verbatim paths can change when they use a different - // separator. - if let Some(parser) = parser.strip_prefix(r"?\") - && !parser.prefix_bytes().iter().any(|&x| x == b'/') - { - // \\?\ - if let Some(parser) = parser.strip_prefix(r"UNC\") { - // \\?\UNC\server\share - - let path = parser.finish(); - let (server, path) = parse_next_component(path, true); - let (share, _) = parse_next_component(path, true); - - Some(VerbatimUNC(server, share)) - } else { - let path = parser.finish(); - - // in verbatim paths only recognize an exact drive prefix - if let Some(drive) = parse_drive_exact(path) { - // \\?\C: - Some(VerbatimDisk(drive)) - } else { - // \\?\prefix - let (prefix, _) = parse_next_component(path, true); - Some(Verbatim(prefix)) - } - } - } else if let Some(parser) = parser.strip_prefix(r".\") { - // \\.\COM42 - let path = parser.finish(); - let (prefix, _) = parse_next_component(path, false); - Some(DeviceNS(prefix)) - } else { - let path = parser.finish(); - let (server, path) = parse_next_component(path, false); - let (share, _) = parse_next_component(path, false); - - if !server.is_empty() && !share.is_empty() { - // \\server\share - Some(UNC(server, share)) - } else { - // no valid prefix beginning with "\\" recognized - None - } - } - } else { - // If it has a drive like `C:` then it's a disk. - // Otherwise there is no prefix. - parse_drive(path).map(Disk) - } -} - -// Parses a drive prefix, e.g. "C:" and "C:\whatever" -fn parse_drive(path: &OsStr) -> Option<u8> { - // In most DOS systems, it is not possible to have more than 26 drive letters. - // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>. - fn is_valid_drive_letter(drive: &u8) -> bool { - drive.is_ascii_alphabetic() - } - - match path.as_encoded_bytes() { - [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()), - _ => None, - } -} - -// Parses a drive prefix exactly, e.g. "C:" -fn parse_drive_exact(path: &OsStr) -> Option<u8> { - // only parse two bytes: the drive letter and the drive separator - if path.as_encoded_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) { - parse_drive(path) - } else { - None - } -} - -// Parse the next path component. -// -// Returns the next component and the rest of the path excluding the component and separator. -// Does not recognize `/` as a separator character if `verbatim` is true. -fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) { - let separator = if verbatim { is_verbatim_sep } else { is_sep_byte }; - - match path.as_encoded_bytes().iter().position(|&x| separator(x)) { - Some(separator_start) => { - let separator_end = separator_start + 1; - - let component = &path.as_encoded_bytes()[..separator_start]; - - // Panic safe - // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index. - let path = &path.as_encoded_bytes()[separator_end..]; - - // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\') - // is encoded in a single byte, therefore `bytes[separator_start]` and - // `bytes[separator_end]` must be code point boundaries and thus - // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices. - unsafe { - ( - OsStr::from_encoded_bytes_unchecked(component), - OsStr::from_encoded_bytes_unchecked(path), - ) - } - } - None => (path, OsStr::new("")), - } -} - /// Returns a UTF-16 encoded path capable of bypassing the legacy `MAX_PATH` limits. /// /// This path may or may not have a verbatim prefix. |
