diff options
| author | 王宇逸 <Strawberry_Str@hotmail.com> | 2025-06-01 23:12:35 +0800 |
|---|---|---|
| committer | 王宇逸 <Strawberry_Str@hotmail.com> | 2025-06-16 09:24:07 +0800 |
| commit | 3cb0cba054d9d1871f3a10345d5c30cfc7ac214c (patch) | |
| tree | 8e8c225bf85c5c9afc3afe80cdb7b5ebf49140ba /library/std/src/sys | |
| parent | 015c7770ec0ffdba9ff03f1861144a827497f8ca (diff) | |
| download | rust-3cb0cba054d9d1871f3a10345d5c30cfc7ac214c.tar.gz rust-3cb0cba054d9d1871f3a10345d5c30cfc7ac214c.zip | |
Handle win32 separator & prefixes for cygwin paths
Diffstat (limited to 'library/std/src/sys')
| -rw-r--r-- | library/std/src/sys/path/cygwin.rs | 92 | ||||
| -rw-r--r-- | library/std/src/sys/path/mod.rs | 5 | ||||
| -rw-r--r-- | library/std/src/sys/path/windows.rs | 175 | ||||
| -rw-r--r-- | library/std/src/sys/path/windows/tests.rs | 2 | ||||
| -rw-r--r-- | library/std/src/sys/path/windows_prefix.rs | 182 |
5 files changed, 284 insertions, 172 deletions
diff --git a/library/std/src/sys/path/cygwin.rs b/library/std/src/sys/path/cygwin.rs new file mode 100644 index 00000000000..e90372805bb --- /dev/null +++ b/library/std/src/sys/path/cygwin.rs @@ -0,0 +1,92 @@ +use crate::ffi::OsString; +use crate::os::unix::ffi::OsStringExt; +use crate::path::{Path, PathBuf}; +use crate::sys::common::small_c_string::run_path_with_cstr; +use crate::sys::cvt; +use crate::{io, ptr}; + +#[inline] +pub fn is_sep_byte(b: u8) -> bool { + b == b'/' || b == b'\\' +} + +/// Cygwin allways prefers `/` over `\`, and it always converts all `/` to `\` +/// internally when calling Win32 APIs. Therefore, the server component of path +/// `\\?\UNC\localhost/share` is `localhost/share` on Win32, but `localhost` +/// on Cygwin. +#[inline] +pub fn is_verbatim_sep(b: u8) -> bool { + b == b'/' || b == b'\\' +} + +pub use super::windows_prefix::parse_prefix; + +pub const MAIN_SEP_STR: &str = "/"; +pub const MAIN_SEP: char = '/'; + +unsafe extern "C" { + // Doc: https://cygwin.com/cygwin-api/func-cygwin-conv-path.html + // Src: https://github.com/cygwin/cygwin/blob/718a15ba50e0d01c79800bd658c2477f9a603540/winsup/cygwin/path.cc#L3902 + // Safety: + // * `what` should be `CCP_WIN_A_TO_POSIX` here + // * `from` is null-terminated UTF-8 path + // * `to` is buffer, the buffer size is `size`. + // + // Converts a path to an absolute POSIX path, no matter the input is Win32 path or POSIX path. + fn cygwin_conv_path( + what: libc::c_uint, + from: *const libc::c_char, + to: *mut u8, + size: libc::size_t, + ) -> libc::ssize_t; +} + +const CCP_WIN_A_TO_POSIX: libc::c_uint = 2; + +/// Make a POSIX path absolute. +pub(crate) fn absolute(path: &Path) -> io::Result<PathBuf> { + run_path_with_cstr(path, &|path| { + let conv = CCP_WIN_A_TO_POSIX; + let size = cvt(unsafe { cygwin_conv_path(conv, path.as_ptr(), ptr::null_mut(), 0) })?; + // If success, size should not be 0. + debug_assert!(size >= 1); + let size = size as usize; + let mut buffer = Vec::with_capacity(size); + cvt(unsafe { cygwin_conv_path(conv, path.as_ptr(), buffer.as_mut_ptr(), size) })?; + unsafe { + buffer.set_len(size - 1); + } + Ok(PathBuf::from(OsString::from_vec(buffer))) + }) + .map(|path| { + if path.prefix().is_some() { + return path; + } + + // From unix.rs + let mut components = path.components(); + let path_os = path.as_os_str().as_encoded_bytes(); + + let mut normalized = if path_os.starts_with(b"//") && !path_os.starts_with(b"///") { + components.next(); + PathBuf::from("//") + } else { + PathBuf::new() + }; + normalized.extend(components); + + if path_os.ends_with(b"/") { + normalized.push(""); + } + + normalized + }) +} + +pub(crate) fn is_absolute(path: &Path) -> bool { + if path.as_os_str().as_encoded_bytes().starts_with(b"\\") { + path.has_root() && path.prefix().is_some() + } else { + path.has_root() + } +} diff --git a/library/std/src/sys/path/mod.rs b/library/std/src/sys/path/mod.rs index 1fa4e80d678..a4ff4338cf5 100644 --- a/library/std/src/sys/path/mod.rs +++ b/library/std/src/sys/path/mod.rs @@ -1,6 +1,7 @@ cfg_if::cfg_if! { if #[cfg(target_os = "windows")] { mod windows; + mod windows_prefix; pub use windows::*; } else if #[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] { mod sgx; @@ -11,6 +12,10 @@ cfg_if::cfg_if! { } else if #[cfg(target_os = "uefi")] { mod uefi; pub use uefi::*; + } else if #[cfg(target_os = "cygwin")] { + mod cygwin; + mod windows_prefix; + pub use cygwin::*; } else { mod unix; pub use unix::*; diff --git a/library/std/src/sys/path/windows.rs b/library/std/src/sys/path/windows.rs index e0e003f6a81..f124e1e5a71 100644 --- a/library/std/src/sys/path/windows.rs +++ b/library/std/src/sys/path/windows.rs @@ -1,5 +1,5 @@ use crate::ffi::{OsStr, OsString}; -use crate::path::{Path, PathBuf, Prefix}; +use crate::path::{Path, PathBuf}; use crate::sys::api::utf16; use crate::sys::pal::{c, fill_utf16_buf, os2path, to_u16s}; use crate::{io, ptr}; @@ -7,6 +7,8 @@ use crate::{io, ptr}; #[cfg(test)] mod tests; +pub use super::windows_prefix::parse_prefix; + pub const MAIN_SEP_STR: &str = "\\"; pub const MAIN_SEP: char = '\\'; @@ -77,177 +79,6 @@ pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf { path.into() } -struct PrefixParser<'a, const LEN: usize> { - path: &'a OsStr, - prefix: [u8; LEN], -} - -impl<'a, const LEN: usize> PrefixParser<'a, LEN> { - #[inline] - fn get_prefix(path: &OsStr) -> [u8; LEN] { - let mut prefix = [0; LEN]; - // SAFETY: Only ASCII characters are modified. - for (i, &ch) in path.as_encoded_bytes().iter().take(LEN).enumerate() { - prefix[i] = if ch == b'/' { b'\\' } else { ch }; - } - prefix - } - - fn new(path: &'a OsStr) -> Self { - Self { path, prefix: Self::get_prefix(path) } - } - - fn as_slice(&self) -> PrefixParserSlice<'a, '_> { - PrefixParserSlice { - path: self.path, - prefix: &self.prefix[..LEN.min(self.path.len())], - index: 0, - } - } -} - -struct PrefixParserSlice<'a, 'b> { - path: &'a OsStr, - prefix: &'b [u8], - index: usize, -} - -impl<'a> PrefixParserSlice<'a, '_> { - fn strip_prefix(&self, prefix: &str) -> Option<Self> { - self.prefix[self.index..] - .starts_with(prefix.as_bytes()) - .then_some(Self { index: self.index + prefix.len(), ..*self }) - } - - fn prefix_bytes(&self) -> &'a [u8] { - &self.path.as_encoded_bytes()[..self.index] - } - - fn finish(self) -> &'a OsStr { - // SAFETY: The unsafety here stems from converting between &OsStr and - // &[u8] and back. This is safe to do because (1) we only look at ASCII - // contents of the encoding and (2) new &OsStr values are produced only - // from ASCII-bounded slices of existing &OsStr values. - unsafe { OsStr::from_encoded_bytes_unchecked(&self.path.as_encoded_bytes()[self.index..]) } - } -} - -pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> { - use Prefix::{DeviceNS, Disk, UNC, Verbatim, VerbatimDisk, VerbatimUNC}; - - let parser = PrefixParser::<8>::new(path); - let parser = parser.as_slice(); - if let Some(parser) = parser.strip_prefix(r"\\") { - // \\ - - // The meaning of verbatim paths can change when they use a different - // separator. - if let Some(parser) = parser.strip_prefix(r"?\") - && !parser.prefix_bytes().iter().any(|&x| x == b'/') - { - // \\?\ - if let Some(parser) = parser.strip_prefix(r"UNC\") { - // \\?\UNC\server\share - - let path = parser.finish(); - let (server, path) = parse_next_component(path, true); - let (share, _) = parse_next_component(path, true); - - Some(VerbatimUNC(server, share)) - } else { - let path = parser.finish(); - - // in verbatim paths only recognize an exact drive prefix - if let Some(drive) = parse_drive_exact(path) { - // \\?\C: - Some(VerbatimDisk(drive)) - } else { - // \\?\prefix - let (prefix, _) = parse_next_component(path, true); - Some(Verbatim(prefix)) - } - } - } else if let Some(parser) = parser.strip_prefix(r".\") { - // \\.\COM42 - let path = parser.finish(); - let (prefix, _) = parse_next_component(path, false); - Some(DeviceNS(prefix)) - } else { - let path = parser.finish(); - let (server, path) = parse_next_component(path, false); - let (share, _) = parse_next_component(path, false); - - if !server.is_empty() && !share.is_empty() { - // \\server\share - Some(UNC(server, share)) - } else { - // no valid prefix beginning with "\\" recognized - None - } - } - } else { - // If it has a drive like `C:` then it's a disk. - // Otherwise there is no prefix. - parse_drive(path).map(Disk) - } -} - -// Parses a drive prefix, e.g. "C:" and "C:\whatever" -fn parse_drive(path: &OsStr) -> Option<u8> { - // In most DOS systems, it is not possible to have more than 26 drive letters. - // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>. - fn is_valid_drive_letter(drive: &u8) -> bool { - drive.is_ascii_alphabetic() - } - - match path.as_encoded_bytes() { - [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()), - _ => None, - } -} - -// Parses a drive prefix exactly, e.g. "C:" -fn parse_drive_exact(path: &OsStr) -> Option<u8> { - // only parse two bytes: the drive letter and the drive separator - if path.as_encoded_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) { - parse_drive(path) - } else { - None - } -} - -// Parse the next path component. -// -// Returns the next component and the rest of the path excluding the component and separator. -// Does not recognize `/` as a separator character if `verbatim` is true. -fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) { - let separator = if verbatim { is_verbatim_sep } else { is_sep_byte }; - - match path.as_encoded_bytes().iter().position(|&x| separator(x)) { - Some(separator_start) => { - let separator_end = separator_start + 1; - - let component = &path.as_encoded_bytes()[..separator_start]; - - // Panic safe - // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index. - let path = &path.as_encoded_bytes()[separator_end..]; - - // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\') - // is encoded in a single byte, therefore `bytes[separator_start]` and - // `bytes[separator_end]` must be code point boundaries and thus - // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices. - unsafe { - ( - OsStr::from_encoded_bytes_unchecked(component), - OsStr::from_encoded_bytes_unchecked(path), - ) - } - } - None => (path, OsStr::new("")), - } -} - /// Returns a UTF-16 encoded path capable of bypassing the legacy `MAX_PATH` limits. /// /// This path may or may not have a verbatim prefix. diff --git a/library/std/src/sys/path/windows/tests.rs b/library/std/src/sys/path/windows/tests.rs index 9eb79203dca..830f48d7bfc 100644 --- a/library/std/src/sys/path/windows/tests.rs +++ b/library/std/src/sys/path/windows/tests.rs @@ -1,4 +1,6 @@ +use super::super::windows_prefix::*; use super::*; +use crate::path::Prefix; #[test] fn test_parse_next_component() { diff --git a/library/std/src/sys/path/windows_prefix.rs b/library/std/src/sys/path/windows_prefix.rs new file mode 100644 index 00000000000..b9dfe754485 --- /dev/null +++ b/library/std/src/sys/path/windows_prefix.rs @@ -0,0 +1,182 @@ +//! Parse Windows prefixes, for both Windows and Cygwin. + +use super::{is_sep_byte, is_verbatim_sep}; +use crate::ffi::OsStr; +use crate::path::Prefix; + +struct PrefixParser<'a, const LEN: usize> { + path: &'a OsStr, + prefix: [u8; LEN], +} + +impl<'a, const LEN: usize> PrefixParser<'a, LEN> { + #[inline] + fn get_prefix(path: &OsStr) -> [u8; LEN] { + let mut prefix = [0; LEN]; + // SAFETY: Only ASCII characters are modified. + for (i, &ch) in path.as_encoded_bytes().iter().take(LEN).enumerate() { + prefix[i] = if ch == b'/' { b'\\' } else { ch }; + } + prefix + } + + fn new(path: &'a OsStr) -> Self { + Self { path, prefix: Self::get_prefix(path) } + } + + fn as_slice(&self) -> PrefixParserSlice<'a, '_> { + PrefixParserSlice { + path: self.path, + prefix: &self.prefix[..LEN.min(self.path.len())], + index: 0, + } + } +} + +struct PrefixParserSlice<'a, 'b> { + path: &'a OsStr, + prefix: &'b [u8], + index: usize, +} + +impl<'a> PrefixParserSlice<'a, '_> { + fn strip_prefix(&self, prefix: &str) -> Option<Self> { + self.prefix[self.index..] + .starts_with(prefix.as_bytes()) + .then_some(Self { index: self.index + prefix.len(), ..*self }) + } + + fn prefix_bytes(&self) -> &'a [u8] { + &self.path.as_encoded_bytes()[..self.index] + } + + fn finish(self) -> &'a OsStr { + // SAFETY: The unsafety here stems from converting between &OsStr and + // &[u8] and back. This is safe to do because (1) we only look at ASCII + // contents of the encoding and (2) new &OsStr values are produced only + // from ASCII-bounded slices of existing &OsStr values. + unsafe { OsStr::from_encoded_bytes_unchecked(&self.path.as_encoded_bytes()[self.index..]) } + } +} + +pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> { + use Prefix::{DeviceNS, Disk, UNC, Verbatim, VerbatimDisk, VerbatimUNC}; + + let parser = PrefixParser::<8>::new(path); + let parser = parser.as_slice(); + if let Some(parser) = parser.strip_prefix(r"\\") { + // \\ + + // It's a POSIX path. + if cfg!(target_os = "cygwin") && !path.as_encoded_bytes().iter().any(|&x| x == b'\\') { + return None; + } + + // The meaning of verbatim paths can change when they use a different + // separator. + if let Some(parser) = parser.strip_prefix(r"?\") + // Cygwin allows `/` in verbatim paths. + && (cfg!(target_os = "cygwin") || !parser.prefix_bytes().iter().any(|&x| x == b'/')) + { + // \\?\ + if let Some(parser) = parser.strip_prefix(r"UNC\") { + // \\?\UNC\server\share + + let path = parser.finish(); + let (server, path) = parse_next_component(path, true); + let (share, _) = parse_next_component(path, true); + + Some(VerbatimUNC(server, share)) + } else { + let path = parser.finish(); + + // in verbatim paths only recognize an exact drive prefix + if let Some(drive) = parse_drive_exact(path) { + // \\?\C: + Some(VerbatimDisk(drive)) + } else { + // \\?\prefix + let (prefix, _) = parse_next_component(path, true); + Some(Verbatim(prefix)) + } + } + } else if let Some(parser) = parser.strip_prefix(r".\") { + // \\.\COM42 + let path = parser.finish(); + let (prefix, _) = parse_next_component(path, false); + Some(DeviceNS(prefix)) + } else { + let path = parser.finish(); + let (server, path) = parse_next_component(path, false); + let (share, _) = parse_next_component(path, false); + + if !server.is_empty() && !share.is_empty() { + // \\server\share + Some(UNC(server, share)) + } else { + // no valid prefix beginning with "\\" recognized + None + } + } + } else { + // If it has a drive like `C:` then it's a disk. + // Otherwise there is no prefix. + Some(Disk(parse_drive(path)?)) + } +} + +// Parses a drive prefix, e.g. "C:" and "C:\whatever" +fn parse_drive(path: &OsStr) -> Option<u8> { + // In most DOS systems, it is not possible to have more than 26 drive letters. + // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>. + fn is_valid_drive_letter(drive: &u8) -> bool { + drive.is_ascii_alphabetic() + } + + match path.as_encoded_bytes() { + [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()), + _ => None, + } +} + +// Parses a drive prefix exactly, e.g. "C:" +fn parse_drive_exact(path: &OsStr) -> Option<u8> { + // only parse two bytes: the drive letter and the drive separator + if path.as_encoded_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) { + parse_drive(path) + } else { + None + } +} + +// Parse the next path component. +// +// Returns the next component and the rest of the path excluding the component and separator. +// Does not recognize `/` as a separator character on Windows if `verbatim` is true. +pub(crate) fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) { + let separator = if verbatim { is_verbatim_sep } else { is_sep_byte }; + + match path.as_encoded_bytes().iter().position(|&x| separator(x)) { + Some(separator_start) => { + let separator_end = separator_start + 1; + + let component = &path.as_encoded_bytes()[..separator_start]; + + // Panic safe + // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index. + let path = &path.as_encoded_bytes()[separator_end..]; + + // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\') + // is encoded in a single byte, therefore `bytes[separator_start]` and + // `bytes[separator_end]` must be code point boundaries and thus + // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices. + unsafe { + ( + OsStr::from_encoded_bytes_unchecked(component), + OsStr::from_encoded_bytes_unchecked(path), + ) + } + } + None => (path, OsStr::new("")), + } +} |
