//! The Windows command line is just a string //! //! //! This module implements the parsing necessary to turn that string into a list of arguments. #[cfg(test)] mod tests; use crate::ffi::OsString; use crate::fmt; use crate::marker::PhantomData; use crate::num::NonZeroU16; use crate::os::windows::prelude::*; use crate::path::PathBuf; use crate::ptr::NonNull; use crate::sys::c; use crate::sys::windows::os::current_exe; use crate::vec; use core::iter; pub fn args() -> Args { // SAFETY: `GetCommandLineW` returns a pointer to a null terminated UTF-16 // string so it's safe for `WStrUnits` to use. unsafe { let lp_cmd_line = c::GetCommandLineW(); let parsed_args_list = parse_lp_cmd_line(WStrUnits::new(lp_cmd_line), || { current_exe().map(PathBuf::into_os_string).unwrap_or_else(|_| OsString::new()) }); Args { parsed_args_list: parsed_args_list.into_iter() } } } /// Implements the Windows command-line argument parsing algorithm. /// /// Microsoft's documentation for the Windows CLI argument format can be found at /// /// /// A more in-depth explanation is here: /// /// /// Windows includes a function to do command line parsing in shell32.dll. /// However, this is not used for two reasons: /// /// 1. Linking with that DLL causes the process to be registered as a GUI application. /// GUI applications add a bunch of overhead, even if no windows are drawn. See /// . /// /// 2. It does not follow the modern C/C++ argv rules outlined in the first two links above. /// /// This function was tested for equivalence to the C/C++ parsing rules using an /// extensive test suite available at /// . fn parse_lp_cmd_line<'a, F: Fn() -> OsString>( lp_cmd_line: Option>, exe_name: F, ) -> Vec { const BACKSLASH: NonZeroU16 = NonZeroU16::new(b'\\' as u16).unwrap(); const QUOTE: NonZeroU16 = NonZeroU16::new(b'"' as u16).unwrap(); const TAB: NonZeroU16 = NonZeroU16::new(b'\t' as u16).unwrap(); const SPACE: NonZeroU16 = NonZeroU16::new(b' ' as u16).unwrap(); let mut ret_val = Vec::new(); // If the cmd line pointer is null or it points to an empty string then // return the name of the executable as argv[0]. if lp_cmd_line.as_ref().and_then(|cmd| cmd.peek()).is_none() { ret_val.push(exe_name()); return ret_val; } let mut code_units = lp_cmd_line.unwrap(); // The executable name at the beginning is special. let mut in_quotes = false; let mut cur = Vec::new(); for w in &mut code_units { match w { // A quote mark always toggles `in_quotes` no matter what because // there are no escape characters when parsing the executable name. QUOTE => in_quotes = !in_quotes, // If not `in_quotes` then whitespace ends argv[0]. SPACE | TAB if !in_quotes => break, // In all other cases the code unit is taken literally. _ => cur.push(w.get()), } } // Skip whitespace. code_units.advance_while(|w| w == SPACE || w == TAB); ret_val.push(OsString::from_wide(&cur)); // Parse the arguments according to these rules: // * All code units are taken literally except space, tab, quote and backslash. // * When not `in_quotes`, space and tab separate arguments. Consecutive spaces and tabs are // treated as a single separator. // * A space or tab `in_quotes` is taken literally. // * A quote toggles `in_quotes` mode unless it's escaped. An escaped quote is taken literally. // * A quote can be escaped if preceded by an odd number of backslashes. // * If any number of backslashes is immediately followed by a quote then the number of // backslashes is halved (rounding down). // * Backslashes not followed by a quote are all taken literally. // * If `in_quotes` then a quote can also be escaped using another quote // (i.e. two consecutive quotes become one literal quote). let mut cur = Vec::new(); let mut in_quotes = false; while let Some(w) = code_units.next() { match w { // If not `in_quotes`, a space or tab ends the argument. SPACE | TAB if !in_quotes => { ret_val.push(OsString::from_wide(&cur[..])); cur.truncate(0); // Skip whitespace. code_units.advance_while(|w| w == SPACE || w == TAB); } // Backslashes can escape quotes or backslashes but only if consecutive backslashes are followed by a quote. BACKSLASH => { let backslash_count = code_units.advance_while(|w| w == BACKSLASH) + 1; if code_units.peek() == Some(QUOTE) { cur.extend(iter::repeat(BACKSLASH.get()).take(backslash_count / 2)); // The quote is escaped if there are an odd number of backslashes. if backslash_count % 2 == 1 { code_units.next(); cur.push(QUOTE.get()); } } else { // If there is no quote on the end then there is no escaping. cur.extend(iter::repeat(BACKSLASH.get()).take(backslash_count)); } } // If `in_quotes` and not backslash escaped (see above) then a quote either // unsets `in_quote` or is escaped by another quote. QUOTE if in_quotes => match code_units.peek() { // Two consecutive quotes when `in_quotes` produces one literal quote. Some(QUOTE) => { cur.push(QUOTE.get()); code_units.next(); } // Otherwise set `in_quotes`. Some(_) => in_quotes = false, // The end of the command line. // Push `cur` even if empty, which we do by breaking while `in_quotes` is still set. None => break, }, // If not `in_quotes` and not BACKSLASH escaped (see above) then a quote sets `in_quote`. QUOTE => in_quotes = true, // Everything else is always taken literally. _ => cur.push(w.get()), } } // Push the final argument, if any. if !cur.is_empty() || in_quotes { ret_val.push(OsString::from_wide(&cur[..])); } ret_val } pub struct Args { parsed_args_list: vec::IntoIter, } impl fmt::Debug for Args { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.parsed_args_list.as_slice().fmt(f) } } impl Iterator for Args { type Item = OsString; fn next(&mut self) -> Option { self.parsed_args_list.next() } fn size_hint(&self) -> (usize, Option) { self.parsed_args_list.size_hint() } } impl DoubleEndedIterator for Args { fn next_back(&mut self) -> Option { self.parsed_args_list.next_back() } } impl ExactSizeIterator for Args { fn len(&self) -> usize { self.parsed_args_list.len() } } /// A safe iterator over a LPWSTR /// (aka a pointer to a series of UTF-16 code units terminated by a NULL). struct WStrUnits<'a> { // The pointer must never be null... lpwstr: NonNull, // ...and the memory it points to must be valid for this lifetime. lifetime: PhantomData<&'a [u16]>, } impl WStrUnits<'_> { /// Create the iterator. Returns `None` if `lpwstr` is null. /// /// SAFETY: `lpwstr` must point to a null-terminated wide string that lives /// at least as long as the lifetime of this struct. unsafe fn new(lpwstr: *const u16) -> Option { Some(Self { lpwstr: NonNull::new(lpwstr as _)?, lifetime: PhantomData }) } fn peek(&self) -> Option { // SAFETY: It's always safe to read the current item because we don't // ever move out of the array's bounds. unsafe { NonZeroU16::new(*self.lpwstr.as_ptr()) } } /// Advance the iterator while `predicate` returns true. /// Returns the number of items it advanced by. fn advance_while bool>(&mut self, mut predicate: P) -> usize { let mut counter = 0; while let Some(w) = self.peek() { if !predicate(w) { break; } counter += 1; self.next(); } counter } } impl Iterator for WStrUnits<'_> { // This can never return zero as that marks the end of the string. type Item = NonZeroU16; fn next(&mut self) -> Option { // SAFETY: If NULL is reached we immediately return. // Therefore it's safe to advance the pointer after that. unsafe { let next = self.peek()?; self.lpwstr = NonNull::new_unchecked(self.lpwstr.as_ptr().add(1)); Some(next) } } }