about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2021-07-04 01:24:05 +0000
committerbors <bors@rust-lang.org>2021-07-04 01:24:05 +0000
commit154071194641d8db6c056b0d83dd0d071f9f6758 (patch)
treecda0c37923b0e157c4b53c1b238cc96c6df4019f
parentd34a3a401b4e44f289a4d5bf53da83367cbb6aa7 (diff)
parenta200c01e4fb32179af1cbbc47a1e5ea7a6394e9a (diff)
downloadrust-154071194641d8db6c056b0d83dd0d071f9f6758.tar.gz
rust-154071194641d8db6c056b0d83dd0d071f9f6758.zip
Auto merge of #85270 - ChrisDenton:win-env-case, r=m-ou-se
When using `process::Command` on Windows, environment variable names must be case-preserving but case-insensitive

When using `Command` to set the environment variables, the key should be compared as uppercase Unicode but when set it should preserve the original case.

Fixes #85242
-rw-r--r--library/std/src/sys/windows/c.rs12
-rw-r--r--library/std/src/sys/windows/process.rs83
-rw-r--r--library/std/src/sys/windows/process/tests.rs61
3 files changed, 147 insertions, 9 deletions
diff --git a/library/std/src/sys/windows/c.rs b/library/std/src/sys/windows/c.rs
index 3b2ded301c2..63f9be7b7e3 100644
--- a/library/std/src/sys/windows/c.rs
+++ b/library/std/src/sys/windows/c.rs
@@ -74,6 +74,10 @@ pub type ADDRESS_FAMILY = USHORT;
 pub const TRUE: BOOL = 1;
 pub const FALSE: BOOL = 0;
 
+pub const CSTR_LESS_THAN: c_int = 1;
+pub const CSTR_EQUAL: c_int = 2;
+pub const CSTR_GREATER_THAN: c_int = 3;
+
 pub const FILE_ATTRIBUTE_READONLY: DWORD = 0x1;
 pub const FILE_ATTRIBUTE_DIRECTORY: DWORD = 0x10;
 pub const FILE_ATTRIBUTE_REPARSE_POINT: DWORD = 0x400;
@@ -970,6 +974,14 @@ extern "system" {
     pub fn ReleaseSRWLockShared(SRWLock: PSRWLOCK);
     pub fn TryAcquireSRWLockExclusive(SRWLock: PSRWLOCK) -> BOOLEAN;
     pub fn TryAcquireSRWLockShared(SRWLock: PSRWLOCK) -> BOOLEAN;
+
+    pub fn CompareStringOrdinal(
+        lpString1: LPCWSTR,
+        cchCount1: c_int,
+        lpString2: LPCWSTR,
+        cchCount2: c_int,
+        bIgnoreCase: BOOL,
+    ) -> c_int;
 }
 
 #[link(name = "ws2_32")]
diff --git a/library/std/src/sys/windows/process.rs b/library/std/src/sys/windows/process.rs
index b3caf232064..b082e21ab3b 100644
--- a/library/std/src/sys/windows/process.rs
+++ b/library/std/src/sys/windows/process.rs
@@ -4,6 +4,7 @@
 mod tests;
 
 use crate::borrow::Borrow;
+use crate::cmp;
 use crate::collections::BTreeMap;
 use crate::convert::{TryFrom, TryInto};
 use crate::env;
@@ -34,32 +35,95 @@ use libc::{c_void, EXIT_FAILURE, EXIT_SUCCESS};
 // Command
 ////////////////////////////////////////////////////////////////////////////////
 
-#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
+#[derive(Clone, Debug, Eq)]
 #[doc(hidden)]
-pub struct EnvKey(OsString);
+pub struct EnvKey {
+    os_string: OsString,
+    // This stores a UTF-16 encoded string to workaround the mismatch between
+    // Rust's OsString (WTF-8) and the Windows API string type (UTF-16).
+    // Normally converting on every API call is acceptable but here
+    // `c::CompareStringOrdinal` will be called for every use of `==`.
+    utf16: Vec<u16>,
+}
+
+// Comparing Windows environment variable keys[1] are behaviourally the
+// composition of two operations[2]:
+//
+// 1. Case-fold both strings. This is done using a language-independent
+// uppercase mapping that's unique to Windows (albeit based on data from an
+// older Unicode spec). It only operates on individual UTF-16 code units so
+// surrogates are left unchanged. This uppercase mapping can potentially change
+// between Windows versions.
+//
+// 2. Perform an ordinal comparison of the strings. A comparison using ordinal
+// is just a comparison based on the numerical value of each UTF-16 code unit[3].
+//
+// Because the case-folding mapping is unique to Windows and not guaranteed to
+// be stable, we ask the OS to compare the strings for us. This is done by
+// calling `CompareStringOrdinal`[4] with `bIgnoreCase` set to `TRUE`.
+//
+// [1] https://docs.microsoft.com/en-us/dotnet/standard/base-types/best-practices-strings#choosing-a-stringcomparison-member-for-your-method-call
+// [2] https://docs.microsoft.com/en-us/dotnet/standard/base-types/best-practices-strings#stringtoupper-and-stringtolower
+// [3] https://docs.microsoft.com/en-us/dotnet/api/system.stringcomparison?view=net-5.0#System_StringComparison_Ordinal
+// [4] https://docs.microsoft.com/en-us/windows/win32/api/stringapiset/nf-stringapiset-comparestringordinal
+impl Ord for EnvKey {
+    fn cmp(&self, other: &Self) -> cmp::Ordering {
+        unsafe {
+            let result = c::CompareStringOrdinal(
+                self.utf16.as_ptr(),
+                self.utf16.len() as _,
+                other.utf16.as_ptr(),
+                other.utf16.len() as _,
+                c::TRUE,
+            );
+            match result {
+                c::CSTR_LESS_THAN => cmp::Ordering::Less,
+                c::CSTR_EQUAL => cmp::Ordering::Equal,
+                c::CSTR_GREATER_THAN => cmp::Ordering::Greater,
+                // `CompareStringOrdinal` should never fail so long as the parameters are correct.
+                _ => panic!("comparing environment keys failed: {}", Error::last_os_error()),
+            }
+        }
+    }
+}
+impl PartialOrd for EnvKey {
+    fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
+        Some(self.cmp(other))
+    }
+}
+impl PartialEq for EnvKey {
+    fn eq(&self, other: &Self) -> bool {
+        if self.utf16.len() != other.utf16.len() {
+            false
+        } else {
+            self.cmp(other) == cmp::Ordering::Equal
+        }
+    }
+}
 
+// Environment variable keys should preserve their original case even though
+// they are compared using a caseless string mapping.
 impl From<OsString> for EnvKey {
-    fn from(mut k: OsString) -> Self {
-        k.make_ascii_uppercase();
-        EnvKey(k)
+    fn from(k: OsString) -> Self {
+        EnvKey { utf16: k.encode_wide().collect(), os_string: k }
     }
 }
 
 impl From<EnvKey> for OsString {
     fn from(k: EnvKey) -> Self {
-        k.0
+        k.os_string
     }
 }
 
 impl Borrow<OsStr> for EnvKey {
     fn borrow(&self) -> &OsStr {
-        &self.0
+        &self.os_string
     }
 }
 
 impl AsRef<OsStr> for EnvKey {
     fn as_ref(&self) -> &OsStr {
-        &self.0
+        &self.os_string
     }
 }
 
@@ -537,7 +601,8 @@ fn make_envp(maybe_env: Option<BTreeMap<EnvKey, OsString>>) -> io::Result<(*mut
         }
 
         for (k, v) in env {
-            blk.extend(ensure_no_nuls(k.0)?.encode_wide());
+            ensure_no_nuls(k.os_string)?;
+            blk.extend(k.utf16);
             blk.push('=' as u16);
             blk.extend(ensure_no_nuls(v)?.encode_wide());
             blk.push(0);
diff --git a/library/std/src/sys/windows/process/tests.rs b/library/std/src/sys/windows/process/tests.rs
index 8830ae049c6..ff3f9131cc8 100644
--- a/library/std/src/sys/windows/process/tests.rs
+++ b/library/std/src/sys/windows/process/tests.rs
@@ -1,5 +1,7 @@
 use super::make_command_line;
+use crate::env;
 use crate::ffi::{OsStr, OsString};
+use crate::process::Command;
 
 #[test]
 fn test_make_command_line() {
@@ -41,3 +43,62 @@ fn test_make_command_line() {
         "\"\u{03c0}\u{042f}\u{97f3}\u{00e6}\u{221e}\""
     );
 }
+
+// On Windows, environment args are case preserving but comparisons are case-insensitive.
+// See: #85242
+#[test]
+fn windows_env_unicode_case() {
+    let test_cases = [
+        ("ä", "Ä"),
+        ("ß", "SS"),
+        ("Ä", "Ö"),
+        ("Ä", "Ö"),
+        ("I", "İ"),
+        ("I", "i"),
+        ("I", "ı"),
+        ("i", "I"),
+        ("i", "İ"),
+        ("i", "ı"),
+        ("İ", "I"),
+        ("İ", "i"),
+        ("İ", "ı"),
+        ("ı", "I"),
+        ("ı", "i"),
+        ("ı", "İ"),
+        ("ä", "Ä"),
+        ("ß", "SS"),
+        ("Ä", "Ö"),
+        ("Ä", "Ö"),
+        ("I", "İ"),
+        ("I", "i"),
+        ("I", "ı"),
+        ("i", "I"),
+        ("i", "İ"),
+        ("i", "ı"),
+        ("İ", "I"),
+        ("İ", "i"),
+        ("İ", "ı"),
+        ("ı", "I"),
+        ("ı", "i"),
+        ("ı", "İ"),
+    ];
+    // Test that `cmd.env` matches `env::set_var` when setting two strings that
+    // may (or may not) be case-folded when compared.
+    for (a, b) in test_cases.iter() {
+        let mut cmd = Command::new("cmd");
+        cmd.env(a, "1");
+        cmd.env(b, "2");
+        env::set_var(a, "1");
+        env::set_var(b, "2");
+
+        for (key, value) in cmd.get_envs() {
+            assert_eq!(
+                env::var(key).ok(),
+                value.map(|s| s.to_string_lossy().into_owned()),
+                "command environment mismatch: {} {}",
+                a,
+                b
+            );
+        }
+    }
+}