about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--library/alloc/src/string.rs16
-rw-r--r--library/alloc/tests/str.rs22
-rw-r--r--library/core/src/ffi/c_str.rs2
-rw-r--r--library/core/src/ffi/mod.rs5
-rw-r--r--library/core/src/str/iter.rs21
-rw-r--r--library/std/src/ffi/os_str.rs65
-rw-r--r--library/std/src/os/raw/mod.rs5
-rw-r--r--library/std/src/os/unix/fs.rs31
-rw-r--r--library/std/src/sys/unix/os_str.rs10
-rw-r--r--library/std/src/sys/windows/os_str.rs10
-rw-r--r--library/std/src/sys_common/wtf8.rs15
11 files changed, 177 insertions, 25 deletions
diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs
index ad7b77f5497..0019f51b3d7 100644
--- a/library/alloc/src/string.rs
+++ b/library/alloc/src/string.rs
@@ -1290,11 +1290,11 @@ impl String {
     /// Basic usage:
     ///
     /// ```
-    /// let mut s = String::from("foo");
+    /// let mut s = String::from("abč");
     ///
-    /// assert_eq!(s.pop(), Some('o'));
-    /// assert_eq!(s.pop(), Some('o'));
-    /// assert_eq!(s.pop(), Some('f'));
+    /// assert_eq!(s.pop(), Some('č'));
+    /// assert_eq!(s.pop(), Some('b'));
+    /// assert_eq!(s.pop(), Some('a'));
     ///
     /// assert_eq!(s.pop(), None);
     /// ```
@@ -1324,11 +1324,11 @@ impl String {
     /// Basic usage:
     ///
     /// ```
-    /// let mut s = String::from("foo");
+    /// let mut s = String::from("abç");
     ///
-    /// assert_eq!(s.remove(0), 'f');
-    /// assert_eq!(s.remove(1), 'o');
-    /// assert_eq!(s.remove(0), 'o');
+    /// assert_eq!(s.remove(0), 'a');
+    /// assert_eq!(s.remove(1), 'ç');
+    /// assert_eq!(s.remove(0), 'b');
     /// ```
     #[inline]
     #[stable(feature = "rust1", since = "1.0.0")]
diff --git a/library/alloc/tests/str.rs b/library/alloc/tests/str.rs
index 82c1a9f9ad7..8a4b4ac4e8d 100644
--- a/library/alloc/tests/str.rs
+++ b/library/alloc/tests/str.rs
@@ -1739,6 +1739,28 @@ fn test_utf16_code_units() {
 }
 
 #[test]
+fn test_utf16_size_hint() {
+    assert_eq!("".encode_utf16().size_hint(), (0, Some(0)));
+    assert_eq!("123".encode_utf16().size_hint(), (1, Some(3)));
+    assert_eq!("1234".encode_utf16().size_hint(), (2, Some(4)));
+    assert_eq!("12345678".encode_utf16().size_hint(), (3, Some(8)));
+
+    fn hint_vec(src: &str) -> Vec<(usize, Option<usize>)> {
+        let mut it = src.encode_utf16();
+        let mut result = Vec::new();
+        result.push(it.size_hint());
+        while it.next().is_some() {
+            result.push(it.size_hint())
+        }
+        result
+    }
+
+    assert_eq!(hint_vec("12"), [(1, Some(2)), (1, Some(1)), (0, Some(0))]);
+    assert_eq!(hint_vec("\u{101234}"), [(2, Some(4)), (1, Some(1)), (0, Some(0))]);
+    assert_eq!(hint_vec("\u{101234}a"), [(2, Some(5)), (2, Some(2)), (1, Some(1)), (0, Some(0))]);
+}
+
+#[test]
 fn starts_with_in_unicode() {
     assert!(!"├── Cargo.toml".starts_with("# "));
 }
diff --git a/library/core/src/ffi/c_str.rs b/library/core/src/ffi/c_str.rs
index 39f795c1f13..549fec9477a 100644
--- a/library/core/src/ffi/c_str.rs
+++ b/library/core/src/ffi/c_str.rs
@@ -256,7 +256,7 @@ impl CStr {
     #[inline]
     #[must_use]
     #[stable(feature = "rust1", since = "1.0.0")]
-    #[rustc_const_unstable(feature = "const_cstr_from_ptr", issue = "101719")]
+    #[rustc_const_unstable(feature = "const_cstr_from_ptr", issue = "113219")]
     pub const unsafe fn from_ptr<'a>(ptr: *const c_char) -> &'a CStr {
         // SAFETY: The caller has provided a pointer that points to a valid C
         // string with a NUL terminator of size less than `isize::MAX`, whose
diff --git a/library/core/src/ffi/mod.rs b/library/core/src/ffi/mod.rs
index 0488c807604..3fcd5e7c1cb 100644
--- a/library/core/src/ffi/mod.rs
+++ b/library/core/src/ffi/mod.rs
@@ -52,11 +52,6 @@ macro_rules! type_alias {
 }
 
 type_alias! { "c_char.md", c_char = c_char_definition::c_char, NonZero_c_char = c_char_definition::NonZero_c_char;
-// Make this type alias appear cfg-dependent so that Clippy does not suggest
-// replacing `0 as c_char` with `0_i8`/`0_u8`. This #[cfg(all())] can be removed
-// after the false positive in https://github.com/rust-lang/rust-clippy/issues/8093
-// is fixed.
-#[cfg(all())]
 #[doc(cfg(all()))] }
 
 type_alias! { "c_schar.md", c_schar = i8, NonZero_c_schar = NonZeroI8; }
diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs
index 772c3605562..cd16810c4dd 100644
--- a/library/core/src/str/iter.rs
+++ b/library/core/src/str/iter.rs
@@ -1439,11 +1439,22 @@ impl<'a> Iterator for EncodeUtf16<'a> {
 
     #[inline]
     fn size_hint(&self) -> (usize, Option<usize>) {
-        let (low, high) = self.chars.size_hint();
-        // every char gets either one u16 or two u16,
-        // so this iterator is between 1 or 2 times as
-        // long as the underlying iterator.
-        (low, high.and_then(|n| n.checked_mul(2)))
+        let len = self.chars.iter.len();
+        // The highest bytes:code units ratio occurs for 3-byte sequences,
+        // since a 4-byte sequence results in 2 code units. The lower bound
+        // is therefore determined by assuming the remaining bytes contain as
+        // many 3-byte sequences as possible. The highest bytes:code units
+        // ratio is for 1-byte sequences, so use this for the upper bound.
+        // `(len + 2)` can't overflow, because we know that the `slice::Iter`
+        // belongs to a slice in memory which has a maximum length of
+        // `isize::MAX` (that's well below `usize::MAX`)
+        if self.extra == 0 {
+            ((len + 2) / 3, Some(len))
+        } else {
+            // We're in the middle of a surrogate pair, so add the remaining
+            // surrogate to the bounds.
+            ((len + 2) / 3 + 1, Some(len + 1))
+        }
     }
 }
 
diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs
index e7bad9d542c..67e58fd1b86 100644
--- a/library/std/src/ffi/os_str.rs
+++ b/library/std/src/ffi/os_str.rs
@@ -141,6 +141,51 @@ impl OsString {
         OsString { inner: Buf::from_string(String::new()) }
     }
 
+    /// Converts bytes to an `OsString` without checking that the bytes contains
+    /// valid [`OsStr`]-encoded data.
+    ///
+    /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8.
+    /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit
+    /// ASCII.
+    ///
+    /// See the [module's toplevel documentation about conversions][conversions] for safe,
+    /// cross-platform [conversions] from/to native representations.
+    ///
+    /// # Safety
+    ///
+    /// As the encoding is unspecified, callers must pass in bytes that originated as a mixture of
+    /// validated UTF-8 and bytes from [`OsStr::as_os_str_bytes`] from within the same rust version
+    /// built for the same target platform.  For example, reconstructing an `OsString` from bytes sent
+    /// over the network or stored in a file will likely violate these safety rules.
+    ///
+    /// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_os_str_bytes`] can be
+    /// split either immediately before or immediately after any valid non-empty UTF-8 substring.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// #![feature(os_str_bytes)]
+    ///
+    /// use std::ffi::OsStr;
+    ///
+    /// let os_str = OsStr::new("Mary had a little lamb");
+    /// let bytes = os_str.as_os_str_bytes();
+    /// let words = bytes.split(|b| *b == b' ');
+    /// let words: Vec<&OsStr> = words.map(|word| {
+    ///     // SAFETY:
+    ///     // - Each `word` only contains content that originated from `OsStr::as_os_str_bytes`
+    ///     // - Only split with ASCII whitespace which is a non-empty UTF-8 substring
+    ///     unsafe { OsStr::from_os_str_bytes_unchecked(word) }
+    /// }).collect();
+    /// ```
+    ///
+    /// [conversions]: super#conversions
+    #[inline]
+    #[unstable(feature = "os_str_bytes", issue = "111544")]
+    pub unsafe fn from_os_str_bytes_unchecked(bytes: Vec<u8>) -> Self {
+        OsString { inner: Buf::from_os_str_bytes_unchecked(bytes) }
+    }
+
     /// Converts to an [`OsStr`] slice.
     ///
     /// # Examples
@@ -159,6 +204,26 @@ impl OsString {
         self
     }
 
+    /// Converts the `OsString` into a byte slice.  To convert the byte slice back into an
+    /// `OsString`, use the [`OsStr::from_os_str_bytes_unchecked`] function.
+    ///
+    /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8.
+    /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit
+    /// ASCII.
+    ///
+    /// Note: As the encoding is unspecified, any sub-slice of bytes that is not valid UTF-8 should
+    /// be treated as opaque and only comparable within the same rust version built for the same
+    /// target platform.  For example, sending the bytes over the network or storing it in a file
+    /// will likely result in incompatible data.  See [`OsString`] for more encoding details
+    /// and [`std::ffi`] for platform-specific, specified conversions.
+    ///
+    /// [`std::ffi`]: crate::ffi
+    #[inline]
+    #[unstable(feature = "os_str_bytes", issue = "111544")]
+    pub fn into_os_str_bytes(self) -> Vec<u8> {
+        self.inner.into_os_str_bytes()
+    }
+
     /// Converts the `OsString` into a [`String`] if it contains valid Unicode data.
     ///
     /// On failure, ownership of the original `OsString` is returned.
diff --git a/library/std/src/os/raw/mod.rs b/library/std/src/os/raw/mod.rs
index 19d0ffb2e39..5b302e3c2c8 100644
--- a/library/std/src/os/raw/mod.rs
+++ b/library/std/src/os/raw/mod.rs
@@ -9,11 +9,6 @@ macro_rules! alias_core_ffi {
     ($($t:ident)*) => {$(
         #[stable(feature = "raw_os", since = "1.1.0")]
         #[doc = include_str!(concat!("../../../../core/src/ffi/", stringify!($t), ".md"))]
-        // Make this type alias appear cfg-dependent so that Clippy does not suggest
-        // replacing expressions like `0 as c_char` with `0_i8`/`0_u8`. This #[cfg(all())] can be
-        // removed after the false positive in https://github.com/rust-lang/rust-clippy/issues/8093
-        // is fixed.
-        #[cfg(all())]
         #[doc(cfg(all()))]
         pub type $t = core::ffi::$t;
     )*}
diff --git a/library/std/src/os/unix/fs.rs b/library/std/src/os/unix/fs.rs
index 7464ac6069c..88326aaf295 100644
--- a/library/std/src/os/unix/fs.rs
+++ b/library/std/src/os/unix/fs.rs
@@ -149,7 +149,36 @@ pub trait FileExt {
     /// Note that similar to [`File::write`], it is not an error to return a
     /// short write.
     ///
+    /// # Bug
+    /// On some systems, `write_at` utilises [`pwrite64`] to write to files.
+    /// However, this syscall has a [bug] where files opened with the `O_APPEND`
+    /// flag fail to respect the offset parameter, always appending to the end
+    /// of the file instead.
+    ///
+    /// It is possible to inadvertantly set this flag, like in the example below.
+    /// Therefore, it is important to be vigilant while changing options to mitigate
+    /// unexpected behaviour.
+    ///
+    /// ```no_run
+    /// use std::fs::File;
+    /// use std::io;
+    /// use std::os::unix::prelude::FileExt;
+    ///
+    /// fn main() -> io::Result<()> {
+    ///     // Open a file with the append option (sets the `O_APPEND` flag)
+    ///     let file = File::options().append(true).open("foo.txt")?;
+    ///
+    ///     // We attempt to write at offset 10; instead appended to EOF
+    ///     file.write_at(b"sushi", 10)?;
+    ///
+    ///     // foo.txt is 5 bytes long instead of 15
+    ///     Ok(())
+    /// }
+    /// ```
+    ///
     /// [`File::write`]: fs::File::write
+    /// [`pwrite64`]: https://man7.org/linux/man-pages/man2/pwrite.2.html
+    /// [bug]: https://man7.org/linux/man-pages/man2/pwrite.2.html#BUGS
     ///
     /// # Examples
     ///
@@ -159,7 +188,7 @@ pub trait FileExt {
     /// use std::os::unix::prelude::FileExt;
     ///
     /// fn main() -> io::Result<()> {
-    ///     let file = File::open("foo.txt")?;
+    ///     let file = File::create("foo.txt")?;
     ///
     ///     // We now write at the offset 10.
     ///     file.write_at(b"sushi", 10)?;
diff --git a/library/std/src/sys/unix/os_str.rs b/library/std/src/sys/unix/os_str.rs
index f7333fd5a1f..463b0a27515 100644
--- a/library/std/src/sys/unix/os_str.rs
+++ b/library/std/src/sys/unix/os_str.rs
@@ -96,6 +96,16 @@ impl AsInner<[u8]> for Buf {
 }
 
 impl Buf {
+    #[inline]
+    pub fn into_os_str_bytes(self) -> Vec<u8> {
+        self.inner
+    }
+
+    #[inline]
+    pub unsafe fn from_os_str_bytes_unchecked(s: Vec<u8>) -> Self {
+        Self { inner: s }
+    }
+
     pub fn from_string(s: String) -> Buf {
         Buf { inner: s.into_bytes() }
     }
diff --git a/library/std/src/sys/windows/os_str.rs b/library/std/src/sys/windows/os_str.rs
index 16c4f55c687..4708657a907 100644
--- a/library/std/src/sys/windows/os_str.rs
+++ b/library/std/src/sys/windows/os_str.rs
@@ -63,6 +63,16 @@ impl fmt::Display for Slice {
 }
 
 impl Buf {
+    #[inline]
+    pub fn into_os_str_bytes(self) -> Vec<u8> {
+        self.inner.into_bytes()
+    }
+
+    #[inline]
+    pub unsafe fn from_os_str_bytes_unchecked(s: Vec<u8>) -> Self {
+        Self { inner: Wtf8Buf::from_bytes_unchecked(s) }
+    }
+
     pub fn with_capacity(capacity: usize) -> Buf {
         Buf { inner: Wtf8Buf::with_capacity(capacity) }
     }
diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs
index c9d3e13cf0c..195d175cc9b 100644
--- a/library/std/src/sys_common/wtf8.rs
+++ b/library/std/src/sys_common/wtf8.rs
@@ -182,6 +182,15 @@ impl Wtf8Buf {
         Wtf8Buf { bytes: Vec::with_capacity(capacity), is_known_utf8: true }
     }
 
+    /// Creates a WTF-8 string from a WTF-8 byte vec.
+    ///
+    /// Since the byte vec is not checked for valid WTF-8, this functions is
+    /// marked unsafe.
+    #[inline]
+    pub unsafe fn from_bytes_unchecked(value: Vec<u8>) -> Wtf8Buf {
+        Wtf8Buf { bytes: value, is_known_utf8: false }
+    }
+
     /// Creates a WTF-8 string from a UTF-8 `String`.
     ///
     /// This takes ownership of the `String` and does not copy.
@@ -402,6 +411,12 @@ impl Wtf8Buf {
         self.bytes.truncate(new_len)
     }
 
+    /// Consumes the WTF-8 string and tries to convert it to a vec of bytes.
+    #[inline]
+    pub fn into_bytes(self) -> Vec<u8> {
+        self.bytes
+    }
+
     /// Consumes the WTF-8 string and tries to convert it to UTF-8.
     ///
     /// This does not copy the data.