about summary refs log tree commit diff
path: root/library/std/src/os/windows/ffi.rs
diff options
context:
space:
mode:
authorChristiaan Dirkx <christiaan@dirkx.email>2021-03-04 15:15:26 +0100
committerChristiaan Dirkx <christiaan@dirkx.email>2021-05-03 16:56:20 +0200
commitffc20e48de64f73ea57b6b7c121e2b50de30097f (patch)
tree940d3816f1f7c9605615d5ab47313ef748a1411a /library/std/src/os/windows/ffi.rs
parent7024bfffbdd1fb3dafa3f263f6d10fd2aed2ff2d (diff)
downloadrust-ffc20e48de64f73ea57b6b7c121e2b50de30097f.tar.gz
rust-ffc20e48de64f73ea57b6b7c121e2b50de30097f.zip
Move `std::sys::windows::ext` to `std::os::windows`
Diffstat (limited to 'library/std/src/os/windows/ffi.rs')
-rw-r--r--library/std/src/os/windows/ffi.rs134
1 files changed, 134 insertions, 0 deletions
diff --git a/library/std/src/os/windows/ffi.rs b/library/std/src/os/windows/ffi.rs
new file mode 100644
index 00000000000..c89b9ff1efa
--- /dev/null
+++ b/library/std/src/os/windows/ffi.rs
@@ -0,0 +1,134 @@
+//! Windows-specific extensions to the primitives in the `std::ffi` module.
+//!
+//! # Overview
+//!
+//! For historical reasons, the Windows API uses a form of potentially
+//! ill-formed UTF-16 encoding for strings. Specifically, the 16-bit
+//! code units in Windows strings may contain [isolated surrogate code
+//! points which are not paired together][ill-formed-utf-16]. The
+//! Unicode standard requires that surrogate code points (those in the
+//! range U+D800 to U+DFFF) always be *paired*, because in the UTF-16
+//! encoding a *surrogate code unit pair* is used to encode a single
+//! character. For compatibility with code that does not enforce
+//! these pairings, Windows does not enforce them, either.
+//!
+//! While it is not always possible to convert such a string losslessly into
+//! a valid UTF-16 string (or even UTF-8), it is often desirable to be
+//! able to round-trip such a string from and to Windows APIs
+//! losslessly. For example, some Rust code may be "bridging" some
+//! Windows APIs together, just passing `WCHAR` strings among those
+//! APIs without ever really looking into the strings.
+//!
+//! If Rust code *does* need to look into those strings, it can
+//! convert them to valid UTF-8, possibly lossily, by substituting
+//! invalid sequences with [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD], as is
+//! conventionally done in other Rust APIs that deal with string
+//! encodings.
+//!
+//! # `OsStringExt` and `OsStrExt`
+//!
+//! [`OsString`] is the Rust wrapper for owned strings in the
+//! preferred representation of the operating system. On Windows,
+//! this struct gets augmented with an implementation of the
+//! [`OsStringExt`] trait, which has a [`OsStringExt::from_wide`] method. This
+//! lets you create an [`OsString`] from a `&[u16]` slice; presumably
+//! you get such a slice out of a `WCHAR` Windows API.
+//!
+//! Similarly, [`OsStr`] is the Rust wrapper for borrowed strings from
+//! preferred representation of the operating system. On Windows, the
+//! [`OsStrExt`] trait provides the [`OsStrExt::encode_wide`] method, which
+//! outputs an [`EncodeWide`] iterator. You can [`collect`] this
+//! iterator, for example, to obtain a `Vec<u16>`; you can later get a
+//! pointer to this vector's contents and feed it to Windows APIs.
+//!
+//! These traits, along with [`OsString`] and [`OsStr`], work in
+//! conjunction so that it is possible to **round-trip** strings from
+//! Windows and back, with no loss of data, even if the strings are
+//! ill-formed UTF-16.
+//!
+//! [ill-formed-utf-16]: https://simonsapin.github.io/wtf-8/#ill-formed-utf-16
+//! [`collect`]: crate::iter::Iterator::collect
+//! [U+FFFD]: crate::char::REPLACEMENT_CHARACTER
+
+#![stable(feature = "rust1", since = "1.0.0")]
+
+use crate::ffi::{OsStr, OsString};
+use crate::sealed::Sealed;
+use crate::sys::os_str::Buf;
+use crate::sys_common::wtf8::Wtf8Buf;
+use crate::sys_common::{AsInner, FromInner};
+
+#[stable(feature = "rust1", since = "1.0.0")]
+pub use crate::sys_common::wtf8::EncodeWide;
+
+/// Windows-specific extensions to [`OsString`].
+///
+/// This trait is sealed: it cannot be implemented outside the standard library.
+/// This is so that future additional methods are not breaking changes.
+#[stable(feature = "rust1", since = "1.0.0")]
+pub trait OsStringExt: Sealed {
+    /// Creates an `OsString` from a potentially ill-formed UTF-16 slice of
+    /// 16-bit code units.
+    ///
+    /// This is lossless: calling [`OsStrExt::encode_wide`] on the resulting string
+    /// will always return the original code units.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use std::ffi::OsString;
+    /// use std::os::windows::prelude::*;
+    ///
+    /// // UTF-16 encoding for "Unicode".
+    /// let source = [0x0055, 0x006E, 0x0069, 0x0063, 0x006F, 0x0064, 0x0065];
+    ///
+    /// let string = OsString::from_wide(&source[..]);
+    /// ```
+    #[stable(feature = "rust1", since = "1.0.0")]
+    fn from_wide(wide: &[u16]) -> Self;
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl OsStringExt for OsString {
+    fn from_wide(wide: &[u16]) -> OsString {
+        FromInner::from_inner(Buf { inner: Wtf8Buf::from_wide(wide) })
+    }
+}
+
+/// Windows-specific extensions to [`OsStr`].
+///
+/// This trait is sealed: it cannot be implemented outside the standard library.
+/// This is so that future additional methods are not breaking changes.
+#[stable(feature = "rust1", since = "1.0.0")]
+pub trait OsStrExt: Sealed {
+    /// Re-encodes an `OsStr` as a wide character sequence, i.e., potentially
+    /// ill-formed UTF-16.
+    ///
+    /// This is lossless: calling [`OsStringExt::from_wide`] and then
+    /// `encode_wide` on the result will yield the original code units.
+    /// Note that the encoding does not add a final null terminator.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use std::ffi::OsString;
+    /// use std::os::windows::prelude::*;
+    ///
+    /// // UTF-16 encoding for "Unicode".
+    /// let source = [0x0055, 0x006E, 0x0069, 0x0063, 0x006F, 0x0064, 0x0065];
+    ///
+    /// let string = OsString::from_wide(&source[..]);
+    ///
+    /// let result: Vec<u16> = string.encode_wide().collect();
+    /// assert_eq!(&source[..], &result[..]);
+    /// ```
+    #[stable(feature = "rust1", since = "1.0.0")]
+    fn encode_wide(&self) -> EncodeWide<'_>;
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl OsStrExt for OsStr {
+    fn encode_wide(&self) -> EncodeWide<'_> {
+        self.as_inner().inner.encode_wide()
+    }
+}